hydra-works 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/.rubocop.yml +0 -1
  4. data/README.md +9 -9
  5. data/Rakefile +12 -12
  6. data/hydra-works.gemspec +3 -3
  7. data/lib/hydra/works/models/characterization/fits_datastream.rb +1 -1
  8. data/lib/hydra/works/models/work.rb +4 -1
  9. data/lib/hydra/works/services/add_file_to_file_set.rb +5 -5
  10. data/lib/hydra/works/services/characterization_service.rb +2 -2
  11. data/lib/hydra/works/version.rb +1 -1
  12. data/lib/hydra/works.rb +1 -1
  13. data/solr/config/_rest_managed.json +3 -0
  14. data/solr/config/admin-extra.html +31 -0
  15. data/solr/config/elevate.xml +36 -0
  16. data/solr/config/mapping-ISOLatin1Accent.txt +246 -0
  17. data/solr/config/protwords.txt +21 -0
  18. data/solr/config/schema.xml +372 -0
  19. data/solr/config/scripts.conf +24 -0
  20. data/solr/config/solrconfig.xml +419 -0
  21. data/solr/config/spellings.txt +2 -0
  22. data/solr/config/stopwords.txt +58 -0
  23. data/solr/config/stopwords_en.txt +58 -0
  24. data/solr/config/synonyms.txt +31 -0
  25. data/solr/config/xslt/example.xsl +132 -0
  26. data/solr/config/xslt/example_atom.xsl +67 -0
  27. data/solr/config/xslt/example_rss.xsl +66 -0
  28. data/solr/config/xslt/luke.xsl +337 -0
  29. data/spec/hydra/works/models/collection_spec.rb +17 -17
  30. data/spec/hydra/works/models/file_set_spec.rb +8 -8
  31. data/spec/hydra/works/models/{generic_work_spec.rb → work_spec.rb} +57 -57
  32. data/spec/hydra/works/services/add_file_to_file_set_spec.rb +23 -23
  33. data/spec/hydra/works/services/upload_file_spec.rb +22 -22
  34. data/spec/hydra/works_spec.rb +18 -18
  35. data/spec/spec_helper.rb +3 -2
  36. metadata +36 -24
  37. data/config/solrconfig.xml +0 -223
  38. data/lib/hydra/works/models/generic_work.rb +0 -9
  39. data/lib/tasks/hydra-works_tasks.rake +0 -89
  40. data/lib/tasks/jetty.rake +0 -15
@@ -0,0 +1,419 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is a stripped down config file used for a simple example...
21
+ It is *not* a good example to work from.
22
+ -->
23
+ <config>
24
+
25
+ <!-- Controls what version of Lucene various components of Solr
26
+ adhere to. Generally, you want to use the latest version to
27
+ get all bug fixes and improvements. It is highly recommended
28
+ that you fully re-index after changing this setting as it can
29
+ affect both how text is indexed and queried.
30
+ -->
31
+ <luceneMatchVersion>5.0.0</luceneMatchVersion>
32
+
33
+ <lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" />
34
+ <lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lucene-libs" />
35
+
36
+ <directoryFactory name="DirectoryFactory"
37
+ class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
38
+ </directoryFactory>
39
+
40
+ <codecFactory class="solr.SchemaCodecFactory"/>
41
+
42
+ <schemaFactory class="ClassicIndexSchemaFactory"/>
43
+
44
+
45
+ <dataDir>${solr.blacklight-core.data.dir:}</dataDir>
46
+
47
+ <requestDispatcher handleSelect="true" >
48
+ <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
49
+ </requestDispatcher>
50
+
51
+ <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
52
+
53
+ <!-- config for the admin interface -->
54
+ <admin>
55
+ <defaultQuery>*:*</defaultQuery>
56
+ </admin>
57
+
58
+ <!-- SearchHandler
59
+
60
+ http://wiki.apache.org/solr/SearchHandler
61
+
62
+ For processing Search Queries, the primary Request Handler
63
+ provided with Solr is "SearchHandler" It delegates to a sequent
64
+ of SearchComponents (see below) and supports distributed
65
+ queries across multiple shards
66
+ -->
67
+ <requestHandler name="search" class="solr.SearchHandler" default="true">
68
+ <!-- default values for query parameters can be specified, these
69
+ will be overridden by parameters in the request
70
+ -->
71
+ <lst name="defaults">
72
+ <str name="defType">dismax</str>
73
+ <str name="echoParams">explicit</str>
74
+ <int name="rows">10</int>
75
+
76
+ <str name="q.alt">*:*</str>
77
+ <str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>
78
+
79
+ <!-- this qf and pf are used by default, if not otherwise specified by
80
+ client. The default blacklight_config will use these for the
81
+ "keywords" search. See the author_qf/author_pf, title_qf, etc
82
+ below, which the default blacklight_config will specify for
83
+ those searches. You may also be interested in:
84
+ http://wiki.apache.org/solr/LocalParams
85
+ -->
86
+
87
+ <str name="qf">
88
+ title_unstem_search^100000
89
+ subtitle_unstem_search^50000
90
+ title_t^25000
91
+ subtitle_t^10000
92
+ title_addl_unstem_search^5000
93
+ title_addl_t^2500
94
+ title_added_entry_unstem_search^1500
95
+ title_added_entry_t^1250
96
+ subject_topic_unstem_search^1000
97
+ subject_unstem_search^750
98
+ subject_topic_facet^625
99
+ subject_t^500
100
+ author_unstem_search^250
101
+ author_addl_unstem_search^250
102
+ author_t^100
103
+ author_addl_t^50
104
+ subject_addl_unstem_search^250
105
+ subject_addl_t^50
106
+ title_series_unstem_search^25
107
+ title_series_t^10
108
+ isbn_t
109
+ text
110
+ </str>
111
+ <str name="pf">
112
+ title_unstem_search^1000000
113
+ subtitle_unstem_search^500000
114
+ title_t^250000
115
+ subtitle_t^100000
116
+ title_addl_unstem_search^50000
117
+ title_addl_t^25000
118
+ title_added_entry_unstem_search^15000
119
+ title_added_entry_t^12500
120
+ subject_topic_unstem_search^10000
121
+ subject_unstem_search^7500
122
+ subject_topic_facet^6250
123
+ subject_t^5000
124
+ author_unstem_search^2500
125
+ author_addl_unstem_search^2500
126
+ author_t^1000
127
+ author_addl_t^500
128
+ subject_addl_unstem_search^2500
129
+ subject_addl_t^500
130
+ title_series_unstem_search^250
131
+ title_series_t^100
132
+ text^10
133
+ </str>
134
+ <str name="author_qf">
135
+ author_unstem_search^200
136
+ author_addl_unstem_search^50
137
+ author_t^20
138
+ author_addl_t
139
+ </str>
140
+ <str name="author_pf">
141
+ author_unstem_search^2000
142
+ author_addl_unstem_search^500
143
+ author_t^200
144
+ author_addl_t^10
145
+ </str>
146
+ <str name="title_qf">
147
+ title_unstem_search^50000
148
+ subtitle_unstem_search^25000
149
+ title_addl_unstem_search^10000
150
+ title_t^5000
151
+ subtitle_t^2500
152
+ title_addl_t^100
153
+ title_added_entry_unstem_search^50
154
+ title_added_entry_t^10
155
+ title_series_unstem_search^5
156
+ title_series_t
157
+ </str>
158
+ <str name="title_pf">
159
+ title_unstem_search^500000
160
+ subtitle_unstem_search^250000
161
+ title_addl_unstem_search^100000
162
+ title_t^50000
163
+ subtitle_t^25000
164
+ title_addl_t^1000
165
+ title_added_entry_unstem_search^500
166
+ title_added_entry_t^100
167
+ title_series_t^50
168
+ title_series_unstem_search^10
169
+ </str>
170
+ <str name="subject_qf">
171
+ subject_topic_unstem_search^200
172
+ subject_unstem_search^125
173
+ subject_topic_facet^100
174
+ subject_t^50
175
+ subject_addl_unstem_search^10
176
+ subject_addl_t
177
+ </str>
178
+ <str name="subject_pf">
179
+ subject_topic_unstem_search^2000
180
+ subject_unstem_search^1250
181
+ subject_t^1000
182
+ subject_topic_facet^500
183
+ subject_addl_unstem_search^100
184
+ subject_addl_t^10
185
+ </str>
186
+
187
+ <int name="ps">3</int>
188
+ <float name="tie">0.01</float>
189
+
190
+ <!-- NOT using marc_display because it is large and will slow things down for search results -->
191
+ <str name="fl">
192
+ id,
193
+ score,
194
+ author_display,
195
+ author_vern_display,
196
+ format,
197
+ isbn_t,
198
+ language_facet,
199
+ lc_callnum_display,
200
+ material_type_display,
201
+ published_display,
202
+ published_vern_display,
203
+ pub_date,
204
+ title_display,
205
+ title_vern_display,
206
+ subject_topic_facet,
207
+ subject_geo_facet,
208
+ subject_era_facet,
209
+ subtitle_display,
210
+ subtitle_vern_display,
211
+ url_fulltext_display,
212
+ url_suppl_display,
213
+ </str>
214
+
215
+ <str name="facet">true</str>
216
+ <str name="facet.mincount">1</str>
217
+ <str name="facet.limit">10</str>
218
+ <str name="facet.field">format</str>
219
+ <str name="facet.field">lc_1letter_facet</str>
220
+ <str name="facet.field">lc_alpha_facet</str>
221
+ <str name="facet.field">lc_b4cutter_facet</str>
222
+ <str name="facet.field">language_facet</str>
223
+ <str name="facet.field">pub_date</str>
224
+ <str name="facet.field">subject_era_facet</str>
225
+ <str name="facet.field">subject_geo_facet</str>
226
+ <str name="facet.field">subject_topic_facet</str>
227
+
228
+ <str name="spellcheck">true</str>
229
+ <str name="spellcheck.dictionary">default</str>
230
+ <str name="spellcheck.onlyMorePopular">true</str>
231
+ <str name="spellcheck.extendedResults">true</str>
232
+ <str name="spellcheck.collate">false</str>
233
+ <str name="spellcheck.count">5</str>
234
+
235
+ </lst>
236
+ <!-- In addition to defaults, "appends" params can be specified
237
+ to identify values which should be appended to the list of
238
+ multi-val params from the query (or the existing "defaults").
239
+ -->
240
+ <!-- In this example, the param "fq=instock:true" would be appended to
241
+ any query time fq params the user may specify, as a mechanism for
242
+ partitioning the index, independent of any user selected filtering
243
+ that may also be desired (perhaps as a result of faceted searching).
244
+
245
+ NOTE: there is *absolutely* nothing a client can do to prevent these
246
+ "appends" values from being used, so don't use this mechanism
247
+ unless you are sure you always want it.
248
+ -->
249
+ <!--
250
+ <lst name="appends">
251
+ <str name="fq">inStock:true</str>
252
+ </lst>
253
+ -->
254
+ <!-- "invariants" are a way of letting the Solr maintainer lock down
255
+ the options available to Solr clients. Any params values
256
+ specified here are used regardless of what values may be specified
257
+ in either the query, the "defaults", or the "appends" params.
258
+
259
+ In this example, the facet.field and facet.query params would
260
+ be fixed, limiting the facets clients can use. Faceting is
261
+ not turned on by default - but if the client does specify
262
+ facet=true in the request, these are the only facets they
263
+ will be able to see counts for; regardless of what other
264
+ facet.field or facet.query params they may specify.
265
+
266
+ NOTE: there is *absolutely* nothing a client can do to prevent these
267
+ "invariants" values from being used, so don't use this mechanism
268
+ unless you are sure you always want it.
269
+ -->
270
+ <!--
271
+ <lst name="invariants">
272
+ <str name="facet.field">cat</str>
273
+ <str name="facet.field">manu_exact</str>
274
+ <str name="facet.query">price:[* TO 500]</str>
275
+ <str name="facet.query">price:[500 TO *]</str>
276
+ </lst>
277
+ -->
278
+ <!-- If the default list of SearchComponents is not desired, that
279
+ list can either be overridden completely, or components can be
280
+ prepended or appended to the default list. (see below)
281
+ -->
282
+ <!--
283
+ <arr name="components">
284
+ <str>nameOfCustomComponent1</str>
285
+ <str>nameOfCustomComponent2</str>
286
+ </arr>
287
+ -->
288
+ <arr name="last-components">
289
+ <str>spellcheck</str>
290
+ </arr>
291
+
292
+ </requestHandler>
293
+
294
+ <requestHandler name="standard" class="solr.SearchHandler">
295
+ <lst name="defaults">
296
+ <str name="echoParams">explicit</str>
297
+ <str name="defType">lucene</str>
298
+ </lst>
299
+ </requestHandler>
300
+
301
+ <!-- for requests to get a single document; use id=666 instead of q=id:666 -->
302
+ <requestHandler name="document" class="solr.SearchHandler" >
303
+ <lst name="defaults">
304
+ <str name="echoParams">all</str>
305
+ <str name="fl">*</str>
306
+ <str name="rows">1</str>
307
+ <str name="q">{!term f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
308
+ </lst>
309
+ </requestHandler>
310
+
311
+ <!-- Spell Check
312
+
313
+ The spell check component can return a list of alternative spelling
314
+ suggestions.
315
+
316
+ http://wiki.apache.org/solr/SpellCheckComponent
317
+ -->
318
+ <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
319
+
320
+ <str name="queryAnalyzerFieldType">textSpell</str>
321
+
322
+ <!-- Multiple "Spell Checkers" can be declared and used by this
323
+ component
324
+ -->
325
+
326
+ <!-- a spellchecker built from a field of the main index, and
327
+ written to disk
328
+ -->
329
+ <lst name="spellchecker">
330
+ <str name="name">default</str>
331
+ <str name="field">spell</str>
332
+ <str name="spellcheckIndexDir">./spell</str>
333
+ <str name="buildOnOptimize">true</str>
334
+ </lst>
335
+ <lst name="spellchecker">
336
+ <str name="name">author</str>
337
+ <str name="field">author_spell</str>
338
+ <str name="spellcheckIndexDir">./spell_author</str>
339
+ <str name="accuracy">0.7</str>
340
+ <str name="buildOnOptimize">true</str>
341
+ </lst>
342
+ <lst name="spellchecker">
343
+ <str name="name">subject</str>
344
+ <str name="field">subject_spell</str>
345
+ <str name="spellcheckIndexDir">./spell_subject</str>
346
+ <str name="accuracy">0.7</str>
347
+ <str name="buildOnOptimize">true</str>
348
+ </lst>
349
+ <lst name="spellchecker">
350
+ <str name="name">title</str>
351
+ <str name="field">title_spell</str>
352
+ <str name="spellcheckIndexDir">./spell_title</str>
353
+ <str name="accuracy">0.7</str>
354
+ <str name="buildOnOptimize">true</str>
355
+ </lst>
356
+
357
+ <!-- a spellchecker that uses a different distance measure -->
358
+ <!--
359
+ <lst name="spellchecker">
360
+ <str name="name">jarowinkler</str>
361
+ <str name="field">spell</str>
362
+ <str name="distanceMeasure">
363
+ org.apache.lucene.search.spell.JaroWinklerDistance
364
+ </str>
365
+ <str name="spellcheckIndexDir">spellcheckerJaro</str>
366
+ </lst>
367
+ -->
368
+
369
+ <!-- a spellchecker that use an alternate comparator
370
+
371
+ comparatorClass be one of:
372
+ 1. score (default)
373
+ 2. freq (Frequency first, then score)
374
+ 3. A fully qualified class name
375
+ -->
376
+ <!--
377
+ <lst name="spellchecker">
378
+ <str name="name">freq</str>
379
+ <str name="field">lowerfilt</str>
380
+ <str name="spellcheckIndexDir">spellcheckerFreq</str>
381
+ <str name="comparatorClass">freq</str>
382
+ <str name="buildOnCommit">true</str>
383
+ -->
384
+
385
+ <!-- A spellchecker that reads the list of words from a file -->
386
+ <!--
387
+ <lst name="spellchecker">
388
+ <str name="classname">solr.FileBasedSpellChecker</str>
389
+ <str name="name">file</str>
390
+ <str name="sourceLocation">spellings.txt</str>
391
+ <str name="characterEncoding">UTF-8</str>
392
+ <str name="spellcheckIndexDir">spellcheckerFile</str>
393
+ </lst>
394
+ -->
395
+ </searchComponent>
396
+
397
+ <searchComponent name="suggest" class="solr.SuggestComponent">
398
+ <lst name="suggester">
399
+ <str name="name">mySuggester</str>
400
+ <str name="lookupImpl">FuzzyLookupFactory</str>
401
+ <str name="suggestAnalyzerFieldType">textSuggest</str>
402
+ <str name="buildOnCommit">true</str>
403
+ <str name="field">suggest</str>
404
+ </lst>
405
+ </searchComponent>
406
+
407
+ <requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">
408
+ <lst name="defaults">
409
+ <str name="suggest">true</str>
410
+ <str name="suggest.count">5</str>
411
+ <str name="suggest.dictionary">mySuggester</str>
412
+ </lst>
413
+ <arr name="components">
414
+ <str>suggest</str>
415
+ </arr>
416
+ </requestHandler>
417
+
418
+ </config>
419
+
@@ -0,0 +1,2 @@
1
+ pizza
2
+ history
@@ -0,0 +1,58 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ #-----------------------------------------------------------------------
17
+ # a couple of test stopwords to test that the words are really being
18
+ # configured from this file:
19
+ stopworda
20
+ stopwordb
21
+
22
+ #Standard english stop words taken from Lucene's StopAnalyzer
23
+ a
24
+ an
25
+ and
26
+ are
27
+ as
28
+ at
29
+ be
30
+ but
31
+ by
32
+ for
33
+ if
34
+ in
35
+ into
36
+ is
37
+ it
38
+ no
39
+ not
40
+ of
41
+ on
42
+ or
43
+ s
44
+ such
45
+ t
46
+ that
47
+ the
48
+ their
49
+ then
50
+ there
51
+ these
52
+ they
53
+ this
54
+ to
55
+ was
56
+ will
57
+ with
58
+
@@ -0,0 +1,58 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ #-----------------------------------------------------------------------
17
+ # a couple of test stopwords to test that the words are really being
18
+ # configured from this file:
19
+ stopworda
20
+ stopwordb
21
+
22
+ #Standard english stop words taken from Lucene's StopAnalyzer
23
+ a
24
+ an
25
+ and
26
+ are
27
+ as
28
+ at
29
+ be
30
+ but
31
+ by
32
+ for
33
+ if
34
+ in
35
+ into
36
+ is
37
+ it
38
+ no
39
+ not
40
+ of
41
+ on
42
+ or
43
+ s
44
+ such
45
+ t
46
+ that
47
+ the
48
+ their
49
+ then
50
+ there
51
+ these
52
+ they
53
+ this
54
+ to
55
+ was
56
+ will
57
+ with
58
+
@@ -0,0 +1,31 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ #some test synonym mappings unlikely to appear in real input text
15
+ aaa => aaaa
16
+ bbb => bbbb1 bbbb2
17
+ ccc => cccc1,cccc2
18
+ a\=>a => b\=>b
19
+ a\,a => b\,b
20
+ fooaaa,baraaa,bazaaa
21
+
22
+ # Some synonym groups specific to this example
23
+ GB,gib,gigabyte,gigabytes
24
+ MB,mib,megabyte,megabytes
25
+ Television, Televisions, TV, TVs
26
+ #notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
27
+ #after us won't split it into two words.
28
+
29
+ # Synonym mappings can be used for spelling correction too
30
+ pixima => pixma
31
+