hydra-works 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/.rubocop.yml +0 -1
- data/README.md +9 -9
- data/Rakefile +12 -12
- data/hydra-works.gemspec +3 -3
- data/lib/hydra/works/models/characterization/fits_datastream.rb +1 -1
- data/lib/hydra/works/models/work.rb +4 -1
- data/lib/hydra/works/services/add_file_to_file_set.rb +5 -5
- data/lib/hydra/works/services/characterization_service.rb +2 -2
- data/lib/hydra/works/version.rb +1 -1
- data/lib/hydra/works.rb +1 -1
- data/solr/config/_rest_managed.json +3 -0
- data/solr/config/admin-extra.html +31 -0
- data/solr/config/elevate.xml +36 -0
- data/solr/config/mapping-ISOLatin1Accent.txt +246 -0
- data/solr/config/protwords.txt +21 -0
- data/solr/config/schema.xml +372 -0
- data/solr/config/scripts.conf +24 -0
- data/solr/config/solrconfig.xml +419 -0
- data/solr/config/spellings.txt +2 -0
- data/solr/config/stopwords.txt +58 -0
- data/solr/config/stopwords_en.txt +58 -0
- data/solr/config/synonyms.txt +31 -0
- data/solr/config/xslt/example.xsl +132 -0
- data/solr/config/xslt/example_atom.xsl +67 -0
- data/solr/config/xslt/example_rss.xsl +66 -0
- data/solr/config/xslt/luke.xsl +337 -0
- data/spec/hydra/works/models/collection_spec.rb +17 -17
- data/spec/hydra/works/models/file_set_spec.rb +8 -8
- data/spec/hydra/works/models/{generic_work_spec.rb → work_spec.rb} +57 -57
- data/spec/hydra/works/services/add_file_to_file_set_spec.rb +23 -23
- data/spec/hydra/works/services/upload_file_spec.rb +22 -22
- data/spec/hydra/works_spec.rb +18 -18
- data/spec/spec_helper.rb +3 -2
- metadata +36 -24
- data/config/solrconfig.xml +0 -223
- data/lib/hydra/works/models/generic_work.rb +0 -9
- data/lib/tasks/hydra-works_tasks.rake +0 -89
- data/lib/tasks/jetty.rake +0 -15
@@ -0,0 +1,419 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" ?>
|
2
|
+
<!--
|
3
|
+
Licensed to the Apache Software Foundation (ASF) under one or more
|
4
|
+
contributor license agreements. See the NOTICE file distributed with
|
5
|
+
this work for additional information regarding copyright ownership.
|
6
|
+
The ASF licenses this file to You under the Apache License, Version 2.0
|
7
|
+
(the "License"); you may not use this file except in compliance with
|
8
|
+
the License. You may obtain a copy of the License at
|
9
|
+
|
10
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
Unless required by applicable law or agreed to in writing, software
|
13
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
See the License for the specific language governing permissions and
|
16
|
+
limitations under the License.
|
17
|
+
-->
|
18
|
+
|
19
|
+
<!--
|
20
|
+
This is a stripped down config file used for a simple example...
|
21
|
+
It is *not* a good example to work from.
|
22
|
+
-->
|
23
|
+
<config>
|
24
|
+
|
25
|
+
<!-- Controls what version of Lucene various components of Solr
|
26
|
+
adhere to. Generally, you want to use the latest version to
|
27
|
+
get all bug fixes and improvements. It is highly recommended
|
28
|
+
that you fully re-index after changing this setting as it can
|
29
|
+
affect both how text is indexed and queried.
|
30
|
+
-->
|
31
|
+
<luceneMatchVersion>5.0.0</luceneMatchVersion>
|
32
|
+
|
33
|
+
<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" />
|
34
|
+
<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lucene-libs" />
|
35
|
+
|
36
|
+
<directoryFactory name="DirectoryFactory"
|
37
|
+
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
|
38
|
+
</directoryFactory>
|
39
|
+
|
40
|
+
<codecFactory class="solr.SchemaCodecFactory"/>
|
41
|
+
|
42
|
+
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
43
|
+
|
44
|
+
|
45
|
+
<dataDir>${solr.blacklight-core.data.dir:}</dataDir>
|
46
|
+
|
47
|
+
<requestDispatcher handleSelect="true" >
|
48
|
+
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
|
49
|
+
</requestDispatcher>
|
50
|
+
|
51
|
+
<requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
|
52
|
+
|
53
|
+
<!-- config for the admin interface -->
|
54
|
+
<admin>
|
55
|
+
<defaultQuery>*:*</defaultQuery>
|
56
|
+
</admin>
|
57
|
+
|
58
|
+
<!-- SearchHandler
|
59
|
+
|
60
|
+
http://wiki.apache.org/solr/SearchHandler
|
61
|
+
|
62
|
+
For processing Search Queries, the primary Request Handler
|
63
|
+
provided with Solr is "SearchHandler" It delegates to a sequent
|
64
|
+
of SearchComponents (see below) and supports distributed
|
65
|
+
queries across multiple shards
|
66
|
+
-->
|
67
|
+
<requestHandler name="search" class="solr.SearchHandler" default="true">
|
68
|
+
<!-- default values for query parameters can be specified, these
|
69
|
+
will be overridden by parameters in the request
|
70
|
+
-->
|
71
|
+
<lst name="defaults">
|
72
|
+
<str name="defType">dismax</str>
|
73
|
+
<str name="echoParams">explicit</str>
|
74
|
+
<int name="rows">10</int>
|
75
|
+
|
76
|
+
<str name="q.alt">*:*</str>
|
77
|
+
<str name="mm">2<-1 5<-2 6<90%</str>
|
78
|
+
|
79
|
+
<!-- this qf and pf are used by default, if not otherwise specified by
|
80
|
+
client. The default blacklight_config will use these for the
|
81
|
+
"keywords" search. See the author_qf/author_pf, title_qf, etc
|
82
|
+
below, which the default blacklight_config will specify for
|
83
|
+
those searches. You may also be interested in:
|
84
|
+
http://wiki.apache.org/solr/LocalParams
|
85
|
+
-->
|
86
|
+
|
87
|
+
<str name="qf">
|
88
|
+
title_unstem_search^100000
|
89
|
+
subtitle_unstem_search^50000
|
90
|
+
title_t^25000
|
91
|
+
subtitle_t^10000
|
92
|
+
title_addl_unstem_search^5000
|
93
|
+
title_addl_t^2500
|
94
|
+
title_added_entry_unstem_search^1500
|
95
|
+
title_added_entry_t^1250
|
96
|
+
subject_topic_unstem_search^1000
|
97
|
+
subject_unstem_search^750
|
98
|
+
subject_topic_facet^625
|
99
|
+
subject_t^500
|
100
|
+
author_unstem_search^250
|
101
|
+
author_addl_unstem_search^250
|
102
|
+
author_t^100
|
103
|
+
author_addl_t^50
|
104
|
+
subject_addl_unstem_search^250
|
105
|
+
subject_addl_t^50
|
106
|
+
title_series_unstem_search^25
|
107
|
+
title_series_t^10
|
108
|
+
isbn_t
|
109
|
+
text
|
110
|
+
</str>
|
111
|
+
<str name="pf">
|
112
|
+
title_unstem_search^1000000
|
113
|
+
subtitle_unstem_search^500000
|
114
|
+
title_t^250000
|
115
|
+
subtitle_t^100000
|
116
|
+
title_addl_unstem_search^50000
|
117
|
+
title_addl_t^25000
|
118
|
+
title_added_entry_unstem_search^15000
|
119
|
+
title_added_entry_t^12500
|
120
|
+
subject_topic_unstem_search^10000
|
121
|
+
subject_unstem_search^7500
|
122
|
+
subject_topic_facet^6250
|
123
|
+
subject_t^5000
|
124
|
+
author_unstem_search^2500
|
125
|
+
author_addl_unstem_search^2500
|
126
|
+
author_t^1000
|
127
|
+
author_addl_t^500
|
128
|
+
subject_addl_unstem_search^2500
|
129
|
+
subject_addl_t^500
|
130
|
+
title_series_unstem_search^250
|
131
|
+
title_series_t^100
|
132
|
+
text^10
|
133
|
+
</str>
|
134
|
+
<str name="author_qf">
|
135
|
+
author_unstem_search^200
|
136
|
+
author_addl_unstem_search^50
|
137
|
+
author_t^20
|
138
|
+
author_addl_t
|
139
|
+
</str>
|
140
|
+
<str name="author_pf">
|
141
|
+
author_unstem_search^2000
|
142
|
+
author_addl_unstem_search^500
|
143
|
+
author_t^200
|
144
|
+
author_addl_t^10
|
145
|
+
</str>
|
146
|
+
<str name="title_qf">
|
147
|
+
title_unstem_search^50000
|
148
|
+
subtitle_unstem_search^25000
|
149
|
+
title_addl_unstem_search^10000
|
150
|
+
title_t^5000
|
151
|
+
subtitle_t^2500
|
152
|
+
title_addl_t^100
|
153
|
+
title_added_entry_unstem_search^50
|
154
|
+
title_added_entry_t^10
|
155
|
+
title_series_unstem_search^5
|
156
|
+
title_series_t
|
157
|
+
</str>
|
158
|
+
<str name="title_pf">
|
159
|
+
title_unstem_search^500000
|
160
|
+
subtitle_unstem_search^250000
|
161
|
+
title_addl_unstem_search^100000
|
162
|
+
title_t^50000
|
163
|
+
subtitle_t^25000
|
164
|
+
title_addl_t^1000
|
165
|
+
title_added_entry_unstem_search^500
|
166
|
+
title_added_entry_t^100
|
167
|
+
title_series_t^50
|
168
|
+
title_series_unstem_search^10
|
169
|
+
</str>
|
170
|
+
<str name="subject_qf">
|
171
|
+
subject_topic_unstem_search^200
|
172
|
+
subject_unstem_search^125
|
173
|
+
subject_topic_facet^100
|
174
|
+
subject_t^50
|
175
|
+
subject_addl_unstem_search^10
|
176
|
+
subject_addl_t
|
177
|
+
</str>
|
178
|
+
<str name="subject_pf">
|
179
|
+
subject_topic_unstem_search^2000
|
180
|
+
subject_unstem_search^1250
|
181
|
+
subject_t^1000
|
182
|
+
subject_topic_facet^500
|
183
|
+
subject_addl_unstem_search^100
|
184
|
+
subject_addl_t^10
|
185
|
+
</str>
|
186
|
+
|
187
|
+
<int name="ps">3</int>
|
188
|
+
<float name="tie">0.01</float>
|
189
|
+
|
190
|
+
<!-- NOT using marc_display because it is large and will slow things down for search results -->
|
191
|
+
<str name="fl">
|
192
|
+
id,
|
193
|
+
score,
|
194
|
+
author_display,
|
195
|
+
author_vern_display,
|
196
|
+
format,
|
197
|
+
isbn_t,
|
198
|
+
language_facet,
|
199
|
+
lc_callnum_display,
|
200
|
+
material_type_display,
|
201
|
+
published_display,
|
202
|
+
published_vern_display,
|
203
|
+
pub_date,
|
204
|
+
title_display,
|
205
|
+
title_vern_display,
|
206
|
+
subject_topic_facet,
|
207
|
+
subject_geo_facet,
|
208
|
+
subject_era_facet,
|
209
|
+
subtitle_display,
|
210
|
+
subtitle_vern_display,
|
211
|
+
url_fulltext_display,
|
212
|
+
url_suppl_display,
|
213
|
+
</str>
|
214
|
+
|
215
|
+
<str name="facet">true</str>
|
216
|
+
<str name="facet.mincount">1</str>
|
217
|
+
<str name="facet.limit">10</str>
|
218
|
+
<str name="facet.field">format</str>
|
219
|
+
<str name="facet.field">lc_1letter_facet</str>
|
220
|
+
<str name="facet.field">lc_alpha_facet</str>
|
221
|
+
<str name="facet.field">lc_b4cutter_facet</str>
|
222
|
+
<str name="facet.field">language_facet</str>
|
223
|
+
<str name="facet.field">pub_date</str>
|
224
|
+
<str name="facet.field">subject_era_facet</str>
|
225
|
+
<str name="facet.field">subject_geo_facet</str>
|
226
|
+
<str name="facet.field">subject_topic_facet</str>
|
227
|
+
|
228
|
+
<str name="spellcheck">true</str>
|
229
|
+
<str name="spellcheck.dictionary">default</str>
|
230
|
+
<str name="spellcheck.onlyMorePopular">true</str>
|
231
|
+
<str name="spellcheck.extendedResults">true</str>
|
232
|
+
<str name="spellcheck.collate">false</str>
|
233
|
+
<str name="spellcheck.count">5</str>
|
234
|
+
|
235
|
+
</lst>
|
236
|
+
<!-- In addition to defaults, "appends" params can be specified
|
237
|
+
to identify values which should be appended to the list of
|
238
|
+
multi-val params from the query (or the existing "defaults").
|
239
|
+
-->
|
240
|
+
<!-- In this example, the param "fq=instock:true" would be appended to
|
241
|
+
any query time fq params the user may specify, as a mechanism for
|
242
|
+
partitioning the index, independent of any user selected filtering
|
243
|
+
that may also be desired (perhaps as a result of faceted searching).
|
244
|
+
|
245
|
+
NOTE: there is *absolutely* nothing a client can do to prevent these
|
246
|
+
"appends" values from being used, so don't use this mechanism
|
247
|
+
unless you are sure you always want it.
|
248
|
+
-->
|
249
|
+
<!--
|
250
|
+
<lst name="appends">
|
251
|
+
<str name="fq">inStock:true</str>
|
252
|
+
</lst>
|
253
|
+
-->
|
254
|
+
<!-- "invariants" are a way of letting the Solr maintainer lock down
|
255
|
+
the options available to Solr clients. Any params values
|
256
|
+
specified here are used regardless of what values may be specified
|
257
|
+
in either the query, the "defaults", or the "appends" params.
|
258
|
+
|
259
|
+
In this example, the facet.field and facet.query params would
|
260
|
+
be fixed, limiting the facets clients can use. Faceting is
|
261
|
+
not turned on by default - but if the client does specify
|
262
|
+
facet=true in the request, these are the only facets they
|
263
|
+
will be able to see counts for; regardless of what other
|
264
|
+
facet.field or facet.query params they may specify.
|
265
|
+
|
266
|
+
NOTE: there is *absolutely* nothing a client can do to prevent these
|
267
|
+
"invariants" values from being used, so don't use this mechanism
|
268
|
+
unless you are sure you always want it.
|
269
|
+
-->
|
270
|
+
<!--
|
271
|
+
<lst name="invariants">
|
272
|
+
<str name="facet.field">cat</str>
|
273
|
+
<str name="facet.field">manu_exact</str>
|
274
|
+
<str name="facet.query">price:[* TO 500]</str>
|
275
|
+
<str name="facet.query">price:[500 TO *]</str>
|
276
|
+
</lst>
|
277
|
+
-->
|
278
|
+
<!-- If the default list of SearchComponents is not desired, that
|
279
|
+
list can either be overridden completely, or components can be
|
280
|
+
prepended or appended to the default list. (see below)
|
281
|
+
-->
|
282
|
+
<!--
|
283
|
+
<arr name="components">
|
284
|
+
<str>nameOfCustomComponent1</str>
|
285
|
+
<str>nameOfCustomComponent2</str>
|
286
|
+
</arr>
|
287
|
+
-->
|
288
|
+
<arr name="last-components">
|
289
|
+
<str>spellcheck</str>
|
290
|
+
</arr>
|
291
|
+
|
292
|
+
</requestHandler>
|
293
|
+
|
294
|
+
<requestHandler name="standard" class="solr.SearchHandler">
|
295
|
+
<lst name="defaults">
|
296
|
+
<str name="echoParams">explicit</str>
|
297
|
+
<str name="defType">lucene</str>
|
298
|
+
</lst>
|
299
|
+
</requestHandler>
|
300
|
+
|
301
|
+
<!-- for requests to get a single document; use id=666 instead of q=id:666 -->
|
302
|
+
<requestHandler name="document" class="solr.SearchHandler" >
|
303
|
+
<lst name="defaults">
|
304
|
+
<str name="echoParams">all</str>
|
305
|
+
<str name="fl">*</str>
|
306
|
+
<str name="rows">1</str>
|
307
|
+
<str name="q">{!term f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
|
308
|
+
</lst>
|
309
|
+
</requestHandler>
|
310
|
+
|
311
|
+
<!-- Spell Check
|
312
|
+
|
313
|
+
The spell check component can return a list of alternative spelling
|
314
|
+
suggestions.
|
315
|
+
|
316
|
+
http://wiki.apache.org/solr/SpellCheckComponent
|
317
|
+
-->
|
318
|
+
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
|
319
|
+
|
320
|
+
<str name="queryAnalyzerFieldType">textSpell</str>
|
321
|
+
|
322
|
+
<!-- Multiple "Spell Checkers" can be declared and used by this
|
323
|
+
component
|
324
|
+
-->
|
325
|
+
|
326
|
+
<!-- a spellchecker built from a field of the main index, and
|
327
|
+
written to disk
|
328
|
+
-->
|
329
|
+
<lst name="spellchecker">
|
330
|
+
<str name="name">default</str>
|
331
|
+
<str name="field">spell</str>
|
332
|
+
<str name="spellcheckIndexDir">./spell</str>
|
333
|
+
<str name="buildOnOptimize">true</str>
|
334
|
+
</lst>
|
335
|
+
<lst name="spellchecker">
|
336
|
+
<str name="name">author</str>
|
337
|
+
<str name="field">author_spell</str>
|
338
|
+
<str name="spellcheckIndexDir">./spell_author</str>
|
339
|
+
<str name="accuracy">0.7</str>
|
340
|
+
<str name="buildOnOptimize">true</str>
|
341
|
+
</lst>
|
342
|
+
<lst name="spellchecker">
|
343
|
+
<str name="name">subject</str>
|
344
|
+
<str name="field">subject_spell</str>
|
345
|
+
<str name="spellcheckIndexDir">./spell_subject</str>
|
346
|
+
<str name="accuracy">0.7</str>
|
347
|
+
<str name="buildOnOptimize">true</str>
|
348
|
+
</lst>
|
349
|
+
<lst name="spellchecker">
|
350
|
+
<str name="name">title</str>
|
351
|
+
<str name="field">title_spell</str>
|
352
|
+
<str name="spellcheckIndexDir">./spell_title</str>
|
353
|
+
<str name="accuracy">0.7</str>
|
354
|
+
<str name="buildOnOptimize">true</str>
|
355
|
+
</lst>
|
356
|
+
|
357
|
+
<!-- a spellchecker that uses a different distance measure -->
|
358
|
+
<!--
|
359
|
+
<lst name="spellchecker">
|
360
|
+
<str name="name">jarowinkler</str>
|
361
|
+
<str name="field">spell</str>
|
362
|
+
<str name="distanceMeasure">
|
363
|
+
org.apache.lucene.search.spell.JaroWinklerDistance
|
364
|
+
</str>
|
365
|
+
<str name="spellcheckIndexDir">spellcheckerJaro</str>
|
366
|
+
</lst>
|
367
|
+
-->
|
368
|
+
|
369
|
+
<!-- a spellchecker that use an alternate comparator
|
370
|
+
|
371
|
+
comparatorClass be one of:
|
372
|
+
1. score (default)
|
373
|
+
2. freq (Frequency first, then score)
|
374
|
+
3. A fully qualified class name
|
375
|
+
-->
|
376
|
+
<!--
|
377
|
+
<lst name="spellchecker">
|
378
|
+
<str name="name">freq</str>
|
379
|
+
<str name="field">lowerfilt</str>
|
380
|
+
<str name="spellcheckIndexDir">spellcheckerFreq</str>
|
381
|
+
<str name="comparatorClass">freq</str>
|
382
|
+
<str name="buildOnCommit">true</str>
|
383
|
+
-->
|
384
|
+
|
385
|
+
<!-- A spellchecker that reads the list of words from a file -->
|
386
|
+
<!--
|
387
|
+
<lst name="spellchecker">
|
388
|
+
<str name="classname">solr.FileBasedSpellChecker</str>
|
389
|
+
<str name="name">file</str>
|
390
|
+
<str name="sourceLocation">spellings.txt</str>
|
391
|
+
<str name="characterEncoding">UTF-8</str>
|
392
|
+
<str name="spellcheckIndexDir">spellcheckerFile</str>
|
393
|
+
</lst>
|
394
|
+
-->
|
395
|
+
</searchComponent>
|
396
|
+
|
397
|
+
<searchComponent name="suggest" class="solr.SuggestComponent">
|
398
|
+
<lst name="suggester">
|
399
|
+
<str name="name">mySuggester</str>
|
400
|
+
<str name="lookupImpl">FuzzyLookupFactory</str>
|
401
|
+
<str name="suggestAnalyzerFieldType">textSuggest</str>
|
402
|
+
<str name="buildOnCommit">true</str>
|
403
|
+
<str name="field">suggest</str>
|
404
|
+
</lst>
|
405
|
+
</searchComponent>
|
406
|
+
|
407
|
+
<requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">
|
408
|
+
<lst name="defaults">
|
409
|
+
<str name="suggest">true</str>
|
410
|
+
<str name="suggest.count">5</str>
|
411
|
+
<str name="suggest.dictionary">mySuggester</str>
|
412
|
+
</lst>
|
413
|
+
<arr name="components">
|
414
|
+
<str>suggest</str>
|
415
|
+
</arr>
|
416
|
+
</requestHandler>
|
417
|
+
|
418
|
+
</config>
|
419
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
2
|
+
# contributor license agreements. See the NOTICE file distributed with
|
3
|
+
# this work for additional information regarding copyright ownership.
|
4
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with
|
6
|
+
# the License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
#-----------------------------------------------------------------------
|
17
|
+
# a couple of test stopwords to test that the words are really being
|
18
|
+
# configured from this file:
|
19
|
+
stopworda
|
20
|
+
stopwordb
|
21
|
+
|
22
|
+
#Standard english stop words taken from Lucene's StopAnalyzer
|
23
|
+
a
|
24
|
+
an
|
25
|
+
and
|
26
|
+
are
|
27
|
+
as
|
28
|
+
at
|
29
|
+
be
|
30
|
+
but
|
31
|
+
by
|
32
|
+
for
|
33
|
+
if
|
34
|
+
in
|
35
|
+
into
|
36
|
+
is
|
37
|
+
it
|
38
|
+
no
|
39
|
+
not
|
40
|
+
of
|
41
|
+
on
|
42
|
+
or
|
43
|
+
s
|
44
|
+
such
|
45
|
+
t
|
46
|
+
that
|
47
|
+
the
|
48
|
+
their
|
49
|
+
then
|
50
|
+
there
|
51
|
+
these
|
52
|
+
they
|
53
|
+
this
|
54
|
+
to
|
55
|
+
was
|
56
|
+
will
|
57
|
+
with
|
58
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
2
|
+
# contributor license agreements. See the NOTICE file distributed with
|
3
|
+
# this work for additional information regarding copyright ownership.
|
4
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with
|
6
|
+
# the License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
#-----------------------------------------------------------------------
|
17
|
+
# a couple of test stopwords to test that the words are really being
|
18
|
+
# configured from this file:
|
19
|
+
stopworda
|
20
|
+
stopwordb
|
21
|
+
|
22
|
+
#Standard english stop words taken from Lucene's StopAnalyzer
|
23
|
+
a
|
24
|
+
an
|
25
|
+
and
|
26
|
+
are
|
27
|
+
as
|
28
|
+
at
|
29
|
+
be
|
30
|
+
but
|
31
|
+
by
|
32
|
+
for
|
33
|
+
if
|
34
|
+
in
|
35
|
+
into
|
36
|
+
is
|
37
|
+
it
|
38
|
+
no
|
39
|
+
not
|
40
|
+
of
|
41
|
+
on
|
42
|
+
or
|
43
|
+
s
|
44
|
+
such
|
45
|
+
t
|
46
|
+
that
|
47
|
+
the
|
48
|
+
their
|
49
|
+
then
|
50
|
+
there
|
51
|
+
these
|
52
|
+
they
|
53
|
+
this
|
54
|
+
to
|
55
|
+
was
|
56
|
+
will
|
57
|
+
with
|
58
|
+
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
3
|
+
# the License. You may obtain a copy of the License at
|
4
|
+
#
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
#-----------------------------------------------------------------------
|
14
|
+
#some test synonym mappings unlikely to appear in real input text
|
15
|
+
aaa => aaaa
|
16
|
+
bbb => bbbb1 bbbb2
|
17
|
+
ccc => cccc1,cccc2
|
18
|
+
a\=>a => b\=>b
|
19
|
+
a\,a => b\,b
|
20
|
+
fooaaa,baraaa,bazaaa
|
21
|
+
|
22
|
+
# Some synonym groups specific to this example
|
23
|
+
GB,gib,gigabyte,gigabytes
|
24
|
+
MB,mib,megabyte,megabytes
|
25
|
+
Television, Televisions, TV, TVs
|
26
|
+
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
27
|
+
#after us won't split it into two words.
|
28
|
+
|
29
|
+
# Synonym mappings can be used for spelling correction too
|
30
|
+
pixima => pixma
|
31
|
+
|