hydra-works 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/.rubocop.yml +0 -1
  4. data/README.md +9 -9
  5. data/Rakefile +12 -12
  6. data/hydra-works.gemspec +3 -3
  7. data/lib/hydra/works/models/characterization/fits_datastream.rb +1 -1
  8. data/lib/hydra/works/models/work.rb +4 -1
  9. data/lib/hydra/works/services/add_file_to_file_set.rb +5 -5
  10. data/lib/hydra/works/services/characterization_service.rb +2 -2
  11. data/lib/hydra/works/version.rb +1 -1
  12. data/lib/hydra/works.rb +1 -1
  13. data/solr/config/_rest_managed.json +3 -0
  14. data/solr/config/admin-extra.html +31 -0
  15. data/solr/config/elevate.xml +36 -0
  16. data/solr/config/mapping-ISOLatin1Accent.txt +246 -0
  17. data/solr/config/protwords.txt +21 -0
  18. data/solr/config/schema.xml +372 -0
  19. data/solr/config/scripts.conf +24 -0
  20. data/solr/config/solrconfig.xml +419 -0
  21. data/solr/config/spellings.txt +2 -0
  22. data/solr/config/stopwords.txt +58 -0
  23. data/solr/config/stopwords_en.txt +58 -0
  24. data/solr/config/synonyms.txt +31 -0
  25. data/solr/config/xslt/example.xsl +132 -0
  26. data/solr/config/xslt/example_atom.xsl +67 -0
  27. data/solr/config/xslt/example_rss.xsl +66 -0
  28. data/solr/config/xslt/luke.xsl +337 -0
  29. data/spec/hydra/works/models/collection_spec.rb +17 -17
  30. data/spec/hydra/works/models/file_set_spec.rb +8 -8
  31. data/spec/hydra/works/models/{generic_work_spec.rb → work_spec.rb} +57 -57
  32. data/spec/hydra/works/services/add_file_to_file_set_spec.rb +23 -23
  33. data/spec/hydra/works/services/upload_file_spec.rb +22 -22
  34. data/spec/hydra/works_spec.rb +18 -18
  35. data/spec/spec_helper.rb +3 -2
  36. metadata +36 -24
  37. data/config/solrconfig.xml +0 -223
  38. data/lib/hydra/works/models/generic_work.rb +0 -9
  39. data/lib/tasks/hydra-works_tasks.rake +0 -89
  40. data/lib/tasks/jetty.rake +0 -15
@@ -0,0 +1,419 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is a stripped down config file used for a simple example...
21
+ It is *not* a good example to work from.
22
+ -->
23
+ <config>
24
+
25
+ <!-- Controls what version of Lucene various components of Solr
26
+ adhere to. Generally, you want to use the latest version to
27
+ get all bug fixes and improvements. It is highly recommended
28
+ that you fully re-index after changing this setting as it can
29
+ affect both how text is indexed and queried.
30
+ -->
31
+ <luceneMatchVersion>5.0.0</luceneMatchVersion>
32
+
33
+ <lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" />
34
+ <lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lucene-libs" />
35
+
36
+ <directoryFactory name="DirectoryFactory"
37
+ class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
38
+ </directoryFactory>
39
+
40
+ <codecFactory class="solr.SchemaCodecFactory"/>
41
+
42
+ <schemaFactory class="ClassicIndexSchemaFactory"/>
43
+
44
+
45
+ <dataDir>${solr.blacklight-core.data.dir:}</dataDir>
46
+
47
+ <requestDispatcher handleSelect="true" >
48
+ <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
49
+ </requestDispatcher>
50
+
51
+ <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
52
+
53
+ <!-- config for the admin interface -->
54
+ <admin>
55
+ <defaultQuery>*:*</defaultQuery>
56
+ </admin>
57
+
58
+ <!-- SearchHandler
59
+
60
+ http://wiki.apache.org/solr/SearchHandler
61
+
62
+ For processing Search Queries, the primary Request Handler
63
+ provided with Solr is "SearchHandler" It delegates to a sequent
64
+ of SearchComponents (see below) and supports distributed
65
+ queries across multiple shards
66
+ -->
67
+ <requestHandler name="search" class="solr.SearchHandler" default="true">
68
+ <!-- default values for query parameters can be specified, these
69
+ will be overridden by parameters in the request
70
+ -->
71
+ <lst name="defaults">
72
+ <str name="defType">dismax</str>
73
+ <str name="echoParams">explicit</str>
74
+ <int name="rows">10</int>
75
+
76
+ <str name="q.alt">*:*</str>
77
+ <str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>
78
+
79
+ <!-- this qf and pf are used by default, if not otherwise specified by
80
+ client. The default blacklight_config will use these for the
81
+ "keywords" search. See the author_qf/author_pf, title_qf, etc
82
+ below, which the default blacklight_config will specify for
83
+ those searches. You may also be interested in:
84
+ http://wiki.apache.org/solr/LocalParams
85
+ -->
86
+
87
+ <str name="qf">
88
+ title_unstem_search^100000
89
+ subtitle_unstem_search^50000
90
+ title_t^25000
91
+ subtitle_t^10000
92
+ title_addl_unstem_search^5000
93
+ title_addl_t^2500
94
+ title_added_entry_unstem_search^1500
95
+ title_added_entry_t^1250
96
+ subject_topic_unstem_search^1000
97
+ subject_unstem_search^750
98
+ subject_topic_facet^625
99
+ subject_t^500
100
+ author_unstem_search^250
101
+ author_addl_unstem_search^250
102
+ author_t^100
103
+ author_addl_t^50
104
+ subject_addl_unstem_search^250
105
+ subject_addl_t^50
106
+ title_series_unstem_search^25
107
+ title_series_t^10
108
+ isbn_t
109
+ text
110
+ </str>
111
+ <str name="pf">
112
+ title_unstem_search^1000000
113
+ subtitle_unstem_search^500000
114
+ title_t^250000
115
+ subtitle_t^100000
116
+ title_addl_unstem_search^50000
117
+ title_addl_t^25000
118
+ title_added_entry_unstem_search^15000
119
+ title_added_entry_t^12500
120
+ subject_topic_unstem_search^10000
121
+ subject_unstem_search^7500
122
+ subject_topic_facet^6250
123
+ subject_t^5000
124
+ author_unstem_search^2500
125
+ author_addl_unstem_search^2500
126
+ author_t^1000
127
+ author_addl_t^500
128
+ subject_addl_unstem_search^2500
129
+ subject_addl_t^500
130
+ title_series_unstem_search^250
131
+ title_series_t^100
132
+ text^10
133
+ </str>
134
+ <str name="author_qf">
135
+ author_unstem_search^200
136
+ author_addl_unstem_search^50
137
+ author_t^20
138
+ author_addl_t
139
+ </str>
140
+ <str name="author_pf">
141
+ author_unstem_search^2000
142
+ author_addl_unstem_search^500
143
+ author_t^200
144
+ author_addl_t^10
145
+ </str>
146
+ <str name="title_qf">
147
+ title_unstem_search^50000
148
+ subtitle_unstem_search^25000
149
+ title_addl_unstem_search^10000
150
+ title_t^5000
151
+ subtitle_t^2500
152
+ title_addl_t^100
153
+ title_added_entry_unstem_search^50
154
+ title_added_entry_t^10
155
+ title_series_unstem_search^5
156
+ title_series_t
157
+ </str>
158
+ <str name="title_pf">
159
+ title_unstem_search^500000
160
+ subtitle_unstem_search^250000
161
+ title_addl_unstem_search^100000
162
+ title_t^50000
163
+ subtitle_t^25000
164
+ title_addl_t^1000
165
+ title_added_entry_unstem_search^500
166
+ title_added_entry_t^100
167
+ title_series_t^50
168
+ title_series_unstem_search^10
169
+ </str>
170
+ <str name="subject_qf">
171
+ subject_topic_unstem_search^200
172
+ subject_unstem_search^125
173
+ subject_topic_facet^100
174
+ subject_t^50
175
+ subject_addl_unstem_search^10
176
+ subject_addl_t
177
+ </str>
178
+ <str name="subject_pf">
179
+ subject_topic_unstem_search^2000
180
+ subject_unstem_search^1250
181
+ subject_t^1000
182
+ subject_topic_facet^500
183
+ subject_addl_unstem_search^100
184
+ subject_addl_t^10
185
+ </str>
186
+
187
+ <int name="ps">3</int>
188
+ <float name="tie">0.01</float>
189
+
190
+ <!-- NOT using marc_display because it is large and will slow things down for search results -->
191
+ <str name="fl">
192
+ id,
193
+ score,
194
+ author_display,
195
+ author_vern_display,
196
+ format,
197
+ isbn_t,
198
+ language_facet,
199
+ lc_callnum_display,
200
+ material_type_display,
201
+ published_display,
202
+ published_vern_display,
203
+ pub_date,
204
+ title_display,
205
+ title_vern_display,
206
+ subject_topic_facet,
207
+ subject_geo_facet,
208
+ subject_era_facet,
209
+ subtitle_display,
210
+ subtitle_vern_display,
211
+ url_fulltext_display,
212
+ url_suppl_display,
213
+ </str>
214
+
215
+ <str name="facet">true</str>
216
+ <str name="facet.mincount">1</str>
217
+ <str name="facet.limit">10</str>
218
+ <str name="facet.field">format</str>
219
+ <str name="facet.field">lc_1letter_facet</str>
220
+ <str name="facet.field">lc_alpha_facet</str>
221
+ <str name="facet.field">lc_b4cutter_facet</str>
222
+ <str name="facet.field">language_facet</str>
223
+ <str name="facet.field">pub_date</str>
224
+ <str name="facet.field">subject_era_facet</str>
225
+ <str name="facet.field">subject_geo_facet</str>
226
+ <str name="facet.field">subject_topic_facet</str>
227
+
228
+ <str name="spellcheck">true</str>
229
+ <str name="spellcheck.dictionary">default</str>
230
+ <str name="spellcheck.onlyMorePopular">true</str>
231
+ <str name="spellcheck.extendedResults">true</str>
232
+ <str name="spellcheck.collate">false</str>
233
+ <str name="spellcheck.count">5</str>
234
+
235
+ </lst>
236
+ <!-- In addition to defaults, "appends" params can be specified
237
+ to identify values which should be appended to the list of
238
+ multi-val params from the query (or the existing "defaults").
239
+ -->
240
+ <!-- In this example, the param "fq=instock:true" would be appended to
241
+ any query time fq params the user may specify, as a mechanism for
242
+ partitioning the index, independent of any user selected filtering
243
+ that may also be desired (perhaps as a result of faceted searching).
244
+
245
+ NOTE: there is *absolutely* nothing a client can do to prevent these
246
+ "appends" values from being used, so don't use this mechanism
247
+ unless you are sure you always want it.
248
+ -->
249
+ <!--
250
+ <lst name="appends">
251
+ <str name="fq">inStock:true</str>
252
+ </lst>
253
+ -->
254
+ <!-- "invariants" are a way of letting the Solr maintainer lock down
255
+ the options available to Solr clients. Any params values
256
+ specified here are used regardless of what values may be specified
257
+ in either the query, the "defaults", or the "appends" params.
258
+
259
+ In this example, the facet.field and facet.query params would
260
+ be fixed, limiting the facets clients can use. Faceting is
261
+ not turned on by default - but if the client does specify
262
+ facet=true in the request, these are the only facets they
263
+ will be able to see counts for; regardless of what other
264
+ facet.field or facet.query params they may specify.
265
+
266
+ NOTE: there is *absolutely* nothing a client can do to prevent these
267
+ "invariants" values from being used, so don't use this mechanism
268
+ unless you are sure you always want it.
269
+ -->
270
+ <!--
271
+ <lst name="invariants">
272
+ <str name="facet.field">cat</str>
273
+ <str name="facet.field">manu_exact</str>
274
+ <str name="facet.query">price:[* TO 500]</str>
275
+ <str name="facet.query">price:[500 TO *]</str>
276
+ </lst>
277
+ -->
278
+ <!-- If the default list of SearchComponents is not desired, that
279
+ list can either be overridden completely, or components can be
280
+ prepended or appended to the default list. (see below)
281
+ -->
282
+ <!--
283
+ <arr name="components">
284
+ <str>nameOfCustomComponent1</str>
285
+ <str>nameOfCustomComponent2</str>
286
+ </arr>
287
+ -->
288
+ <arr name="last-components">
289
+ <str>spellcheck</str>
290
+ </arr>
291
+
292
+ </requestHandler>
293
+
294
+ <requestHandler name="standard" class="solr.SearchHandler">
295
+ <lst name="defaults">
296
+ <str name="echoParams">explicit</str>
297
+ <str name="defType">lucene</str>
298
+ </lst>
299
+ </requestHandler>
300
+
301
+ <!-- for requests to get a single document; use id=666 instead of q=id:666 -->
302
+ <requestHandler name="document" class="solr.SearchHandler" >
303
+ <lst name="defaults">
304
+ <str name="echoParams">all</str>
305
+ <str name="fl">*</str>
306
+ <str name="rows">1</str>
307
+ <str name="q">{!term f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
308
+ </lst>
309
+ </requestHandler>
310
+
311
+ <!-- Spell Check
312
+
313
+ The spell check component can return a list of alternative spelling
314
+ suggestions.
315
+
316
+ http://wiki.apache.org/solr/SpellCheckComponent
317
+ -->
318
+ <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
319
+
320
+ <str name="queryAnalyzerFieldType">textSpell</str>
321
+
322
+ <!-- Multiple "Spell Checkers" can be declared and used by this
323
+ component
324
+ -->
325
+
326
+ <!-- a spellchecker built from a field of the main index, and
327
+ written to disk
328
+ -->
329
+ <lst name="spellchecker">
330
+ <str name="name">default</str>
331
+ <str name="field">spell</str>
332
+ <str name="spellcheckIndexDir">./spell</str>
333
+ <str name="buildOnOptimize">true</str>
334
+ </lst>
335
+ <lst name="spellchecker">
336
+ <str name="name">author</str>
337
+ <str name="field">author_spell</str>
338
+ <str name="spellcheckIndexDir">./spell_author</str>
339
+ <str name="accuracy">0.7</str>
340
+ <str name="buildOnOptimize">true</str>
341
+ </lst>
342
+ <lst name="spellchecker">
343
+ <str name="name">subject</str>
344
+ <str name="field">subject_spell</str>
345
+ <str name="spellcheckIndexDir">./spell_subject</str>
346
+ <str name="accuracy">0.7</str>
347
+ <str name="buildOnOptimize">true</str>
348
+ </lst>
349
+ <lst name="spellchecker">
350
+ <str name="name">title</str>
351
+ <str name="field">title_spell</str>
352
+ <str name="spellcheckIndexDir">./spell_title</str>
353
+ <str name="accuracy">0.7</str>
354
+ <str name="buildOnOptimize">true</str>
355
+ </lst>
356
+
357
+ <!-- a spellchecker that uses a different distance measure -->
358
+ <!--
359
+ <lst name="spellchecker">
360
+ <str name="name">jarowinkler</str>
361
+ <str name="field">spell</str>
362
+ <str name="distanceMeasure">
363
+ org.apache.lucene.search.spell.JaroWinklerDistance
364
+ </str>
365
+ <str name="spellcheckIndexDir">spellcheckerJaro</str>
366
+ </lst>
367
+ -->
368
+
369
+ <!-- a spellchecker that use an alternate comparator
370
+
371
+ comparatorClass be one of:
372
+ 1. score (default)
373
+ 2. freq (Frequency first, then score)
374
+ 3. A fully qualified class name
375
+ -->
376
+ <!--
377
+ <lst name="spellchecker">
378
+ <str name="name">freq</str>
379
+ <str name="field">lowerfilt</str>
380
+ <str name="spellcheckIndexDir">spellcheckerFreq</str>
381
+ <str name="comparatorClass">freq</str>
382
+ <str name="buildOnCommit">true</str>
383
+ -->
384
+
385
+ <!-- A spellchecker that reads the list of words from a file -->
386
+ <!--
387
+ <lst name="spellchecker">
388
+ <str name="classname">solr.FileBasedSpellChecker</str>
389
+ <str name="name">file</str>
390
+ <str name="sourceLocation">spellings.txt</str>
391
+ <str name="characterEncoding">UTF-8</str>
392
+ <str name="spellcheckIndexDir">spellcheckerFile</str>
393
+ </lst>
394
+ -->
395
+ </searchComponent>
396
+
397
+ <searchComponent name="suggest" class="solr.SuggestComponent">
398
+ <lst name="suggester">
399
+ <str name="name">mySuggester</str>
400
+ <str name="lookupImpl">FuzzyLookupFactory</str>
401
+ <str name="suggestAnalyzerFieldType">textSuggest</str>
402
+ <str name="buildOnCommit">true</str>
403
+ <str name="field">suggest</str>
404
+ </lst>
405
+ </searchComponent>
406
+
407
+ <requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">
408
+ <lst name="defaults">
409
+ <str name="suggest">true</str>
410
+ <str name="suggest.count">5</str>
411
+ <str name="suggest.dictionary">mySuggester</str>
412
+ </lst>
413
+ <arr name="components">
414
+ <str>suggest</str>
415
+ </arr>
416
+ </requestHandler>
417
+
418
+ </config>
419
+
@@ -0,0 +1,2 @@
1
+ pizza
2
+ history
@@ -0,0 +1,58 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ #-----------------------------------------------------------------------
17
+ # a couple of test stopwords to test that the words are really being
18
+ # configured from this file:
19
+ stopworda
20
+ stopwordb
21
+
22
+ #Standard english stop words taken from Lucene's StopAnalyzer
23
+ a
24
+ an
25
+ and
26
+ are
27
+ as
28
+ at
29
+ be
30
+ but
31
+ by
32
+ for
33
+ if
34
+ in
35
+ into
36
+ is
37
+ it
38
+ no
39
+ not
40
+ of
41
+ on
42
+ or
43
+ s
44
+ such
45
+ t
46
+ that
47
+ the
48
+ their
49
+ then
50
+ there
51
+ these
52
+ they
53
+ this
54
+ to
55
+ was
56
+ will
57
+ with
58
+
@@ -0,0 +1,58 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ #-----------------------------------------------------------------------
17
+ # a couple of test stopwords to test that the words are really being
18
+ # configured from this file:
19
+ stopworda
20
+ stopwordb
21
+
22
+ #Standard english stop words taken from Lucene's StopAnalyzer
23
+ a
24
+ an
25
+ and
26
+ are
27
+ as
28
+ at
29
+ be
30
+ but
31
+ by
32
+ for
33
+ if
34
+ in
35
+ into
36
+ is
37
+ it
38
+ no
39
+ not
40
+ of
41
+ on
42
+ or
43
+ s
44
+ such
45
+ t
46
+ that
47
+ the
48
+ their
49
+ then
50
+ there
51
+ these
52
+ they
53
+ this
54
+ to
55
+ was
56
+ will
57
+ with
58
+
@@ -0,0 +1,31 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ #some test synonym mappings unlikely to appear in real input text
15
+ aaa => aaaa
16
+ bbb => bbbb1 bbbb2
17
+ ccc => cccc1,cccc2
18
+ a\=>a => b\=>b
19
+ a\,a => b\,b
20
+ fooaaa,baraaa,bazaaa
21
+
22
+ # Some synonym groups specific to this example
23
+ GB,gib,gigabyte,gigabytes
24
+ MB,mib,megabyte,megabytes
25
+ Television, Televisions, TV, TVs
26
+ #notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
27
+ #after us won't split it into two words.
28
+
29
+ # Synonym mappings can be used for spelling correction too
30
+ pixima => pixma
31
+