hydra-works 0.16.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +59 -0
  3. data/.gitignore +3 -1
  4. data/.rubocop.yml +2 -6
  5. data/.rubocop_todo.yml +5 -0
  6. data/.solr_wrapper +1 -0
  7. data/CHANGELOG.md +621 -0
  8. data/CODE_OF_CONDUCT.md +36 -0
  9. data/CONTRIBUTING.md +23 -21
  10. data/Gemfile +11 -3
  11. data/LICENSE +14 -16
  12. data/README.md +54 -12
  13. data/SUPPORT.md +5 -0
  14. data/hydra-works.gemspec +15 -13
  15. data/lib/hydra/works/characterization.rb +5 -4
  16. data/lib/hydra/works/characterization/fits_document.rb +348 -144
  17. data/lib/hydra/works/characterization/schema/audio_schema.rb +2 -0
  18. data/lib/hydra/works/characterization/schema/video_schema.rb +2 -0
  19. data/lib/hydra/works/models/concerns/collection_behavior.rb +44 -0
  20. data/lib/hydra/works/models/concerns/file_set_behavior.rb +20 -0
  21. data/lib/hydra/works/models/concerns/work_behavior.rb +54 -0
  22. data/lib/hydra/works/services/add_external_file_to_file_set.rb +1 -1
  23. data/lib/hydra/works/services/add_file_to_file_set.rb +1 -1
  24. data/lib/hydra/works/services/characterization_service.rb +5 -0
  25. data/lib/hydra/works/services/determine_original_name.rb +1 -1
  26. data/lib/hydra/works/version.rb +1 -1
  27. data/lib/hydra/works/virus_scanner.rb +18 -2
  28. data/spec/fixtures/fits_0.8.5_tiff.xml +78 -0
  29. data/spec/fixtures/fits_1.2.0_avi.xml +83 -0
  30. data/spec/fixtures/fits_1.2.0_jpg.xml +76 -0
  31. data/spec/fixtures/fits_1.2.0_mp3.xml +51 -0
  32. data/spec/fixtures/fits_1.2.0_mp4.xml +88 -0
  33. data/spec/fixtures/fits_netcdf_two_mimetypes.xml +35 -0
  34. data/spec/hydra/works/characterization_spec.rb +12 -5
  35. data/spec/hydra/works/models/collection_spec.rb +162 -0
  36. data/spec/hydra/works/models/concerns/file_set/contained_files_spec.rb +3 -16
  37. data/spec/hydra/works/models/file_set_spec.rb +47 -0
  38. data/spec/hydra/works/models/work_spec.rb +213 -7
  39. data/spec/hydra/works/services/characterization_service_spec.rb +90 -27
  40. data/spec/hydra/works/services/persist_derivatives_spec.rb +6 -6
  41. data/spec/hydra/works/virus_scanner_spec.rb +31 -0
  42. data/use-cases/princeton_book_use_case.md +1 -1
  43. metadata +77 -67
  44. data/.travis.yml +0 -15
  45. data/lib/hydra/works/characterization/fits_mapper.rb +0 -0
  46. data/solr/config/_rest_managed.json +0 -3
  47. data/solr/config/admin-extra.html +0 -31
  48. data/solr/config/elevate.xml +0 -36
  49. data/solr/config/mapping-ISOLatin1Accent.txt +0 -246
  50. data/solr/config/protwords.txt +0 -21
  51. data/solr/config/schema.xml +0 -372
  52. data/solr/config/scripts.conf +0 -24
  53. data/solr/config/solrconfig.xml +0 -419
  54. data/solr/config/spellings.txt +0 -2
  55. data/solr/config/stopwords.txt +0 -58
  56. data/solr/config/stopwords_en.txt +0 -58
  57. data/solr/config/synonyms.txt +0 -31
  58. data/solr/config/xslt/example.xsl +0 -132
  59. data/solr/config/xslt/example_atom.xsl +0 -67
  60. data/solr/config/xslt/example_rss.xsl +0 -66
  61. data/solr/config/xslt/luke.xsl +0 -337
  62. data/spec/fixtures/eicar.txt +0 -1
@@ -1,24 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one or more
2
- # contributor license agreements. See the NOTICE file distributed with
3
- # this work for additional information regarding copyright ownership.
4
- # The ASF licenses this file to You under the Apache License, Version 2.0
5
- # (the "License"); you may not use this file except in compliance with
6
- # the License. You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- user=
17
- solr_hostname=localhost
18
- solr_port=8983
19
- rsyncd_port=18983
20
- data_dir=
21
- webapp_name=solr
22
- master_host=
23
- master_data_dir=
24
- master_status_dir=
@@ -1,419 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8" ?>
2
- <!--
3
- Licensed to the Apache Software Foundation (ASF) under one or more
4
- contributor license agreements. See the NOTICE file distributed with
5
- this work for additional information regarding copyright ownership.
6
- The ASF licenses this file to You under the Apache License, Version 2.0
7
- (the "License"); you may not use this file except in compliance with
8
- the License. You may obtain a copy of the License at
9
-
10
- http://www.apache.org/licenses/LICENSE-2.0
11
-
12
- Unless required by applicable law or agreed to in writing, software
13
- distributed under the License is distributed on an "AS IS" BASIS,
14
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- See the License for the specific language governing permissions and
16
- limitations under the License.
17
- -->
18
-
19
- <!--
20
- This is a stripped down config file used for a simple example...
21
- It is *not* a good example to work from.
22
- -->
23
- <config>
24
-
25
- <!-- Controls what version of Lucene various components of Solr
26
- adhere to. Generally, you want to use the latest version to
27
- get all bug fixes and improvements. It is highly recommended
28
- that you fully re-index after changing this setting as it can
29
- affect both how text is indexed and queried.
30
- -->
31
- <luceneMatchVersion>5.0.0</luceneMatchVersion>
32
-
33
- <lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" />
34
- <lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lucene-libs" />
35
-
36
- <directoryFactory name="DirectoryFactory"
37
- class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
38
- </directoryFactory>
39
-
40
- <codecFactory class="solr.SchemaCodecFactory"/>
41
-
42
- <schemaFactory class="ClassicIndexSchemaFactory"/>
43
-
44
-
45
- <dataDir>${solr.blacklight-core.data.dir:}</dataDir>
46
-
47
- <requestDispatcher handleSelect="true" >
48
- <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
49
- </requestDispatcher>
50
-
51
- <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
52
-
53
- <!-- config for the admin interface -->
54
- <admin>
55
- <defaultQuery>*:*</defaultQuery>
56
- </admin>
57
-
58
- <!-- SearchHandler
59
-
60
- http://wiki.apache.org/solr/SearchHandler
61
-
62
- For processing Search Queries, the primary Request Handler
63
- provided with Solr is "SearchHandler" It delegates to a sequent
64
- of SearchComponents (see below) and supports distributed
65
- queries across multiple shards
66
- -->
67
- <requestHandler name="search" class="solr.SearchHandler" default="true">
68
- <!-- default values for query parameters can be specified, these
69
- will be overridden by parameters in the request
70
- -->
71
- <lst name="defaults">
72
- <str name="defType">dismax</str>
73
- <str name="echoParams">explicit</str>
74
- <int name="rows">10</int>
75
-
76
- <str name="q.alt">*:*</str>
77
- <str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>
78
-
79
- <!-- this qf and pf are used by default, if not otherwise specified by
80
- client. The default blacklight_config will use these for the
81
- "keywords" search. See the author_qf/author_pf, title_qf, etc
82
- below, which the default blacklight_config will specify for
83
- those searches. You may also be interested in:
84
- http://wiki.apache.org/solr/LocalParams
85
- -->
86
-
87
- <str name="qf">
88
- title_unstem_search^100000
89
- subtitle_unstem_search^50000
90
- title_t^25000
91
- subtitle_t^10000
92
- title_addl_unstem_search^5000
93
- title_addl_t^2500
94
- title_added_entry_unstem_search^1500
95
- title_added_entry_t^1250
96
- subject_topic_unstem_search^1000
97
- subject_unstem_search^750
98
- subject_topic_facet^625
99
- subject_t^500
100
- author_unstem_search^250
101
- author_addl_unstem_search^250
102
- author_t^100
103
- author_addl_t^50
104
- subject_addl_unstem_search^250
105
- subject_addl_t^50
106
- title_series_unstem_search^25
107
- title_series_t^10
108
- isbn_t
109
- text
110
- </str>
111
- <str name="pf">
112
- title_unstem_search^1000000
113
- subtitle_unstem_search^500000
114
- title_t^250000
115
- subtitle_t^100000
116
- title_addl_unstem_search^50000
117
- title_addl_t^25000
118
- title_added_entry_unstem_search^15000
119
- title_added_entry_t^12500
120
- subject_topic_unstem_search^10000
121
- subject_unstem_search^7500
122
- subject_topic_facet^6250
123
- subject_t^5000
124
- author_unstem_search^2500
125
- author_addl_unstem_search^2500
126
- author_t^1000
127
- author_addl_t^500
128
- subject_addl_unstem_search^2500
129
- subject_addl_t^500
130
- title_series_unstem_search^250
131
- title_series_t^100
132
- text^10
133
- </str>
134
- <str name="author_qf">
135
- author_unstem_search^200
136
- author_addl_unstem_search^50
137
- author_t^20
138
- author_addl_t
139
- </str>
140
- <str name="author_pf">
141
- author_unstem_search^2000
142
- author_addl_unstem_search^500
143
- author_t^200
144
- author_addl_t^10
145
- </str>
146
- <str name="title_qf">
147
- title_unstem_search^50000
148
- subtitle_unstem_search^25000
149
- title_addl_unstem_search^10000
150
- title_t^5000
151
- subtitle_t^2500
152
- title_addl_t^100
153
- title_added_entry_unstem_search^50
154
- title_added_entry_t^10
155
- title_series_unstem_search^5
156
- title_series_t
157
- </str>
158
- <str name="title_pf">
159
- title_unstem_search^500000
160
- subtitle_unstem_search^250000
161
- title_addl_unstem_search^100000
162
- title_t^50000
163
- subtitle_t^25000
164
- title_addl_t^1000
165
- title_added_entry_unstem_search^500
166
- title_added_entry_t^100
167
- title_series_t^50
168
- title_series_unstem_search^10
169
- </str>
170
- <str name="subject_qf">
171
- subject_topic_unstem_search^200
172
- subject_unstem_search^125
173
- subject_topic_facet^100
174
- subject_t^50
175
- subject_addl_unstem_search^10
176
- subject_addl_t
177
- </str>
178
- <str name="subject_pf">
179
- subject_topic_unstem_search^2000
180
- subject_unstem_search^1250
181
- subject_t^1000
182
- subject_topic_facet^500
183
- subject_addl_unstem_search^100
184
- subject_addl_t^10
185
- </str>
186
-
187
- <int name="ps">3</int>
188
- <float name="tie">0.01</float>
189
-
190
- <!-- NOT using marc_display because it is large and will slow things down for search results -->
191
- <str name="fl">
192
- id,
193
- score,
194
- author_display,
195
- author_vern_display,
196
- format,
197
- isbn_t,
198
- language_facet,
199
- lc_callnum_display,
200
- material_type_display,
201
- published_display,
202
- published_vern_display,
203
- pub_date,
204
- title_display,
205
- title_vern_display,
206
- subject_topic_facet,
207
- subject_geo_facet,
208
- subject_era_facet,
209
- subtitle_display,
210
- subtitle_vern_display,
211
- url_fulltext_display,
212
- url_suppl_display,
213
- </str>
214
-
215
- <str name="facet">true</str>
216
- <str name="facet.mincount">1</str>
217
- <str name="facet.limit">10</str>
218
- <str name="facet.field">format</str>
219
- <str name="facet.field">lc_1letter_facet</str>
220
- <str name="facet.field">lc_alpha_facet</str>
221
- <str name="facet.field">lc_b4cutter_facet</str>
222
- <str name="facet.field">language_facet</str>
223
- <str name="facet.field">pub_date</str>
224
- <str name="facet.field">subject_era_facet</str>
225
- <str name="facet.field">subject_geo_facet</str>
226
- <str name="facet.field">subject_topic_facet</str>
227
-
228
- <str name="spellcheck">true</str>
229
- <str name="spellcheck.dictionary">default</str>
230
- <str name="spellcheck.onlyMorePopular">true</str>
231
- <str name="spellcheck.extendedResults">true</str>
232
- <str name="spellcheck.collate">false</str>
233
- <str name="spellcheck.count">5</str>
234
-
235
- </lst>
236
- <!-- In addition to defaults, "appends" params can be specified
237
- to identify values which should be appended to the list of
238
- multi-val params from the query (or the existing "defaults").
239
- -->
240
- <!-- In this example, the param "fq=instock:true" would be appended to
241
- any query time fq params the user may specify, as a mechanism for
242
- partitioning the index, independent of any user selected filtering
243
- that may also be desired (perhaps as a result of faceted searching).
244
-
245
- NOTE: there is *absolutely* nothing a client can do to prevent these
246
- "appends" values from being used, so don't use this mechanism
247
- unless you are sure you always want it.
248
- -->
249
- <!--
250
- <lst name="appends">
251
- <str name="fq">inStock:true</str>
252
- </lst>
253
- -->
254
- <!-- "invariants" are a way of letting the Solr maintainer lock down
255
- the options available to Solr clients. Any params values
256
- specified here are used regardless of what values may be specified
257
- in either the query, the "defaults", or the "appends" params.
258
-
259
- In this example, the facet.field and facet.query params would
260
- be fixed, limiting the facets clients can use. Faceting is
261
- not turned on by default - but if the client does specify
262
- facet=true in the request, these are the only facets they
263
- will be able to see counts for; regardless of what other
264
- facet.field or facet.query params they may specify.
265
-
266
- NOTE: there is *absolutely* nothing a client can do to prevent these
267
- "invariants" values from being used, so don't use this mechanism
268
- unless you are sure you always want it.
269
- -->
270
- <!--
271
- <lst name="invariants">
272
- <str name="facet.field">cat</str>
273
- <str name="facet.field">manu_exact</str>
274
- <str name="facet.query">price:[* TO 500]</str>
275
- <str name="facet.query">price:[500 TO *]</str>
276
- </lst>
277
- -->
278
- <!-- If the default list of SearchComponents is not desired, that
279
- list can either be overridden completely, or components can be
280
- prepended or appended to the default list. (see below)
281
- -->
282
- <!--
283
- <arr name="components">
284
- <str>nameOfCustomComponent1</str>
285
- <str>nameOfCustomComponent2</str>
286
- </arr>
287
- -->
288
- <arr name="last-components">
289
- <str>spellcheck</str>
290
- </arr>
291
-
292
- </requestHandler>
293
-
294
- <requestHandler name="standard" class="solr.SearchHandler">
295
- <lst name="defaults">
296
- <str name="echoParams">explicit</str>
297
- <str name="defType">lucene</str>
298
- </lst>
299
- </requestHandler>
300
-
301
- <!-- for requests to get a single document; use id=666 instead of q=id:666 -->
302
- <requestHandler name="document" class="solr.SearchHandler" >
303
- <lst name="defaults">
304
- <str name="echoParams">all</str>
305
- <str name="fl">*</str>
306
- <str name="rows">1</str>
307
- <str name="q">{!term f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
308
- </lst>
309
- </requestHandler>
310
-
311
- <!-- Spell Check
312
-
313
- The spell check component can return a list of alternative spelling
314
- suggestions.
315
-
316
- http://wiki.apache.org/solr/SpellCheckComponent
317
- -->
318
- <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
319
-
320
- <str name="queryAnalyzerFieldType">textSpell</str>
321
-
322
- <!-- Multiple "Spell Checkers" can be declared and used by this
323
- component
324
- -->
325
-
326
- <!-- a spellchecker built from a field of the main index, and
327
- written to disk
328
- -->
329
- <lst name="spellchecker">
330
- <str name="name">default</str>
331
- <str name="field">spell</str>
332
- <str name="spellcheckIndexDir">./spell</str>
333
- <str name="buildOnOptimize">true</str>
334
- </lst>
335
- <lst name="spellchecker">
336
- <str name="name">author</str>
337
- <str name="field">author_spell</str>
338
- <str name="spellcheckIndexDir">./spell_author</str>
339
- <str name="accuracy">0.7</str>
340
- <str name="buildOnOptimize">true</str>
341
- </lst>
342
- <lst name="spellchecker">
343
- <str name="name">subject</str>
344
- <str name="field">subject_spell</str>
345
- <str name="spellcheckIndexDir">./spell_subject</str>
346
- <str name="accuracy">0.7</str>
347
- <str name="buildOnOptimize">true</str>
348
- </lst>
349
- <lst name="spellchecker">
350
- <str name="name">title</str>
351
- <str name="field">title_spell</str>
352
- <str name="spellcheckIndexDir">./spell_title</str>
353
- <str name="accuracy">0.7</str>
354
- <str name="buildOnOptimize">true</str>
355
- </lst>
356
-
357
- <!-- a spellchecker that uses a different distance measure -->
358
- <!--
359
- <lst name="spellchecker">
360
- <str name="name">jarowinkler</str>
361
- <str name="field">spell</str>
362
- <str name="distanceMeasure">
363
- org.apache.lucene.search.spell.JaroWinklerDistance
364
- </str>
365
- <str name="spellcheckIndexDir">spellcheckerJaro</str>
366
- </lst>
367
- -->
368
-
369
- <!-- a spellchecker that use an alternate comparator
370
-
371
- comparatorClass be one of:
372
- 1. score (default)
373
- 2. freq (Frequency first, then score)
374
- 3. A fully qualified class name
375
- -->
376
- <!--
377
- <lst name="spellchecker">
378
- <str name="name">freq</str>
379
- <str name="field">lowerfilt</str>
380
- <str name="spellcheckIndexDir">spellcheckerFreq</str>
381
- <str name="comparatorClass">freq</str>
382
- <str name="buildOnCommit">true</str>
383
- -->
384
-
385
- <!-- A spellchecker that reads the list of words from a file -->
386
- <!--
387
- <lst name="spellchecker">
388
- <str name="classname">solr.FileBasedSpellChecker</str>
389
- <str name="name">file</str>
390
- <str name="sourceLocation">spellings.txt</str>
391
- <str name="characterEncoding">UTF-8</str>
392
- <str name="spellcheckIndexDir">spellcheckerFile</str>
393
- </lst>
394
- -->
395
- </searchComponent>
396
-
397
- <searchComponent name="suggest" class="solr.SuggestComponent">
398
- <lst name="suggester">
399
- <str name="name">mySuggester</str>
400
- <str name="lookupImpl">FuzzyLookupFactory</str>
401
- <str name="suggestAnalyzerFieldType">textSuggest</str>
402
- <str name="buildOnCommit">true</str>
403
- <str name="field">suggest</str>
404
- </lst>
405
- </searchComponent>
406
-
407
- <requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">
408
- <lst name="defaults">
409
- <str name="suggest">true</str>
410
- <str name="suggest.count">5</str>
411
- <str name="suggest.dictionary">mySuggester</str>
412
- </lst>
413
- <arr name="components">
414
- <str>suggest</str>
415
- </arr>
416
- </requestHandler>
417
-
418
- </config>
419
-
@@ -1,2 +0,0 @@
1
- pizza
2
- history
@@ -1,58 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one or more
2
- # contributor license agreements. See the NOTICE file distributed with
3
- # this work for additional information regarding copyright ownership.
4
- # The ASF licenses this file to You under the Apache License, Version 2.0
5
- # (the "License"); you may not use this file except in compliance with
6
- # the License. You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- #-----------------------------------------------------------------------
17
- # a couple of test stopwords to test that the words are really being
18
- # configured from this file:
19
- stopworda
20
- stopwordb
21
-
22
- #Standard english stop words taken from Lucene's StopAnalyzer
23
- a
24
- an
25
- and
26
- are
27
- as
28
- at
29
- be
30
- but
31
- by
32
- for
33
- if
34
- in
35
- into
36
- is
37
- it
38
- no
39
- not
40
- of
41
- on
42
- or
43
- s
44
- such
45
- t
46
- that
47
- the
48
- their
49
- then
50
- there
51
- these
52
- they
53
- this
54
- to
55
- was
56
- will
57
- with
58
-