greglu-solr-ruby 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. data/CHANGES.yml +50 -0
  2. data/LICENSE.txt +201 -0
  3. data/README +56 -0
  4. data/Rakefile +190 -0
  5. data/examples/delicious_library/dl_importer.rb +60 -0
  6. data/examples/delicious_library/sample_export.txt +164 -0
  7. data/examples/marc/marc_importer.rb +106 -0
  8. data/examples/tang/tang_importer.rb +58 -0
  9. data/lib/solr.rb +21 -0
  10. data/lib/solr/connection.rb +179 -0
  11. data/lib/solr/document.rb +73 -0
  12. data/lib/solr/exception.rb +13 -0
  13. data/lib/solr/field.rb +39 -0
  14. data/lib/solr/importer.rb +19 -0
  15. data/lib/solr/importer/array_mapper.rb +26 -0
  16. data/lib/solr/importer/delimited_file_source.rb +38 -0
  17. data/lib/solr/importer/hpricot_mapper.rb +27 -0
  18. data/lib/solr/importer/mapper.rb +51 -0
  19. data/lib/solr/importer/solr_source.rb +43 -0
  20. data/lib/solr/importer/xpath_mapper.rb +35 -0
  21. data/lib/solr/indexer.rb +52 -0
  22. data/lib/solr/request.rb +26 -0
  23. data/lib/solr/request/add_document.rb +63 -0
  24. data/lib/solr/request/base.rb +36 -0
  25. data/lib/solr/request/commit.rb +31 -0
  26. data/lib/solr/request/delete.rb +50 -0
  27. data/lib/solr/request/dismax.rb +46 -0
  28. data/lib/solr/request/index_info.rb +22 -0
  29. data/lib/solr/request/modify_document.rb +51 -0
  30. data/lib/solr/request/optimize.rb +21 -0
  31. data/lib/solr/request/ping.rb +36 -0
  32. data/lib/solr/request/select.rb +56 -0
  33. data/lib/solr/request/spellcheck.rb +30 -0
  34. data/lib/solr/request/standard.rb +374 -0
  35. data/lib/solr/request/update.rb +23 -0
  36. data/lib/solr/response.rb +27 -0
  37. data/lib/solr/response/add_document.rb +17 -0
  38. data/lib/solr/response/base.rb +42 -0
  39. data/lib/solr/response/commit.rb +17 -0
  40. data/lib/solr/response/delete.rb +13 -0
  41. data/lib/solr/response/dismax.rb +20 -0
  42. data/lib/solr/response/index_info.rb +26 -0
  43. data/lib/solr/response/modify_document.rb +17 -0
  44. data/lib/solr/response/optimize.rb +14 -0
  45. data/lib/solr/response/ping.rb +28 -0
  46. data/lib/solr/response/ruby.rb +42 -0
  47. data/lib/solr/response/select.rb +17 -0
  48. data/lib/solr/response/spellcheck.rb +20 -0
  49. data/lib/solr/response/standard.rb +60 -0
  50. data/lib/solr/response/xml.rb +42 -0
  51. data/lib/solr/solrtasks.rb +27 -0
  52. data/lib/solr/util.rb +32 -0
  53. data/lib/solr/xml.rb +47 -0
  54. data/script/setup.rb +14 -0
  55. data/script/solrshell +18 -0
  56. data/solr-ruby.gemspec +26 -0
  57. data/solr/conf/admin-extra.html +31 -0
  58. data/solr/conf/protwords.txt +21 -0
  59. data/solr/conf/schema.xml +221 -0
  60. data/solr/conf/scripts.conf +24 -0
  61. data/solr/conf/solrconfig.xml +394 -0
  62. data/solr/conf/stopwords.txt +58 -0
  63. data/solr/conf/synonyms.txt +31 -0
  64. data/solr/conf/xslt/example.xsl +132 -0
  65. data/test/conf/admin-extra.html +31 -0
  66. data/test/conf/protwords.txt +21 -0
  67. data/test/conf/schema.xml +237 -0
  68. data/test/conf/scripts.conf +24 -0
  69. data/test/conf/solrconfig.xml +376 -0
  70. data/test/conf/stopwords.txt +58 -0
  71. data/test/conf/synonyms.txt +31 -0
  72. data/test/functional/server_test.rb +218 -0
  73. data/test/functional/test_solr_server.rb +104 -0
  74. data/test/unit/add_document_test.rb +40 -0
  75. data/test/unit/array_mapper_test.rb +37 -0
  76. data/test/unit/changes_yaml_test.rb +21 -0
  77. data/test/unit/commit_test.rb +41 -0
  78. data/test/unit/connection_test.rb +55 -0
  79. data/test/unit/data_mapper_test.rb +75 -0
  80. data/test/unit/delete_test.rb +56 -0
  81. data/test/unit/delimited_file_source_test.rb +29 -0
  82. data/test/unit/dismax_request_test.rb +26 -0
  83. data/test/unit/document_test.rb +69 -0
  84. data/test/unit/field_test.rb +48 -0
  85. data/test/unit/hpricot_mapper_test.rb +44 -0
  86. data/test/unit/hpricot_test_file.xml +26 -0
  87. data/test/unit/indexer_test.rb +57 -0
  88. data/test/unit/modify_document_test.rb +24 -0
  89. data/test/unit/ping_test.rb +51 -0
  90. data/test/unit/request_test.rb +61 -0
  91. data/test/unit/response_test.rb +43 -0
  92. data/test/unit/select_test.rb +25 -0
  93. data/test/unit/solr_mock_base.rb +40 -0
  94. data/test/unit/spellcheck_response_test.rb +26 -0
  95. data/test/unit/spellchecker_request_test.rb +27 -0
  96. data/test/unit/standard_request_test.rb +324 -0
  97. data/test/unit/standard_response_test.rb +174 -0
  98. data/test/unit/suite.rb +16 -0
  99. data/test/unit/tab_delimited.txt +2 -0
  100. data/test/unit/util_test.rb +24 -0
  101. data/test/unit/xpath_mapper_test.rb +38 -0
  102. data/test/unit/xpath_test_file.xml +25 -0
  103. metadata +173 -0
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ user=
17
+ solr_hostname=localhost
18
+ solr_port=8983
19
+ rsyncd_port=18983
20
+ data_dir=
21
+ webapp_name=solr
22
+ master_host=
23
+ master_data_dir=
24
+ master_status_dir=
@@ -0,0 +1,376 @@
1
+ <?xml version="1.0" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <config>
20
+ <!-- Set this to 'false' if you want solr to continue working after it has
21
+ encountered an severe configuration error. In a production environment,
22
+ you may want solr to keep working even if one handler is mis-configured.
23
+
24
+ You may also set this to false using by setting the system property:
25
+ -Dsolr.abortOnConfigurationError=false
26
+ -->
27
+ <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
28
+
29
+ <!-- Used to specify an alternate directory to hold all index data
30
+ other than the default ./data under the Solr home.
31
+ If replication is in use, this should match the replication configuration. -->
32
+ <!--
33
+ <dataDir>./solr/data</dataDir>
34
+ -->
35
+
36
+ <indexDefaults>
37
+ <!-- Values here affect all index writers and act as a default unless overridden. -->
38
+ <useCompoundFile>false</useCompoundFile>
39
+ <ramBufferSizeMB>32</ramBufferSizeMB>
40
+ <mergeFactor>10</mergeFactor>
41
+ <maxMergeDocs>2147483647</maxMergeDocs>
42
+ <maxFieldLength>10000</maxFieldLength>
43
+ <writeLockTimeout>1000</writeLockTimeout>
44
+ <commitLockTimeout>10000</commitLockTimeout>
45
+ </indexDefaults>
46
+
47
+ <mainIndex>
48
+ <!-- options specific to the main on-disk lucene index -->
49
+ <useCompoundFile>false</useCompoundFile>
50
+ <ramBufferSizeMB>32</ramBufferSizeMB>
51
+ <mergeFactor>10</mergeFactor>
52
+ <maxMergeDocs>2147483647</maxMergeDocs>
53
+ <maxFieldLength>10000</maxFieldLength>
54
+
55
+ <!-- If true, unlock any held write or commit locks on startup.
56
+ This defeats the locking mechanism that allows multiple
57
+ processes to safely access a lucene index, and should be
58
+ used with care. -->
59
+ <unlockOnStartup>false</unlockOnStartup>
60
+ </mainIndex>
61
+
62
+ <!-- the default high-performance update handler -->
63
+ <updateHandler class="solr.DirectUpdateHandler2">
64
+
65
+ <!-- A prefix of "solr." for class names is an alias that
66
+ causes solr to search appropriate packages, including
67
+ org.apache.solr.(search|update|request|core|analysis)
68
+ -->
69
+
70
+ <!-- autocommit pending docs if certain criteria are met
71
+ <autoCommit>
72
+ <maxDocs>10000</maxDocs>
73
+ <maxTime>1000</maxTime>
74
+ </autoCommit>
75
+ -->
76
+
77
+ <!-- The RunExecutableListener executes an external command.
78
+ exe - the name of the executable to run
79
+ dir - dir to use as the current working directory. default="."
80
+ wait - the calling thread waits until the executable returns. default="true"
81
+ args - the arguments to pass to the program. default=nothing
82
+ env - environment variables to set. default=nothing
83
+ -->
84
+ <!-- A postCommit event is fired after every commit or optimize command
85
+ <listener event="postCommit" class="solr.RunExecutableListener">
86
+ <str name="exe">snapshooter</str>
87
+ <str name="dir">solr/bin</str>
88
+ <bool name="wait">true</bool>
89
+ <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
90
+ <arr name="env"> <str>MYVAR=val1</str> </arr>
91
+ </listener>
92
+ -->
93
+ <!-- A postOptimize event is fired only after every optimize command, useful
94
+ in conjunction with index distribution to only distribute optimized indicies
95
+ <listener event="postOptimize" class="solr.RunExecutableListener">
96
+ <str name="exe">snapshooter</str>
97
+ <str name="dir">solr/bin</str>
98
+ <bool name="wait">true</bool>
99
+ </listener>
100
+ -->
101
+
102
+ </updateHandler>
103
+
104
+
105
+ <query>
106
+ <!-- Maximum number of clauses in a boolean query... can affect
107
+ range or prefix queries that expand to big boolean
108
+ queries. An exception is thrown if exceeded. -->
109
+ <maxBooleanClauses>1024</maxBooleanClauses>
110
+
111
+
112
+ <!-- Cache used by SolrIndexSearcher for filters (DocSets),
113
+ unordered sets of *all* documents that match a query.
114
+ When a new searcher is opened, its caches may be prepopulated
115
+ or "autowarmed" using data from caches in the old searcher.
116
+ autowarmCount is the number of items to prepopulate. For LRUCache,
117
+ the autowarmed items will be the most recently accessed items.
118
+ Parameters:
119
+ class - the SolrCache implementation (currently only LRUCache)
120
+ size - the maximum number of entries in the cache
121
+ initialSize - the initial capacity (number of entries) of
122
+ the cache. (seel java.util.HashMap)
123
+ autowarmCount - the number of entries to prepopulate from
124
+ and old cache.
125
+ -->
126
+ <filterCache
127
+ class="solr.LRUCache"
128
+ size="512"
129
+ initialSize="512"
130
+ autowarmCount="256"/>
131
+
132
+ <!-- queryResultCache caches results of searches - ordered lists of
133
+ document ids (DocList) based on a query, a sort, and the range
134
+ of documents requested. -->
135
+ <queryResultCache
136
+ class="solr.LRUCache"
137
+ size="512"
138
+ initialSize="512"
139
+ autowarmCount="256"/>
140
+
141
+ <!-- documentCache caches Lucene Document objects (the stored fields for each document).
142
+ Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
143
+ <documentCache
144
+ class="solr.LRUCache"
145
+ size="512"
146
+ initialSize="512"
147
+ autowarmCount="0"/>
148
+
149
+ <!-- If true, stored fields that are not requested will be loaded lazily.
150
+ -->
151
+ <enableLazyFieldLoading>false</enableLazyFieldLoading>
152
+
153
+ <!-- Example of a generic cache. These caches may be accessed by name
154
+ through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
155
+ The purpose is to enable easy caching of user/application level data.
156
+ The regenerator argument should be specified as an implementation
157
+ of solr.search.CacheRegenerator if autowarming is desired. -->
158
+ <!--
159
+ <cache name="myUserCache"
160
+ class="solr.LRUCache"
161
+ size="4096"
162
+ initialSize="1024"
163
+ autowarmCount="1024"
164
+ regenerator="org.mycompany.mypackage.MyRegenerator"
165
+ />
166
+ -->
167
+
168
+ <!-- An optimization that attempts to use a filter to satisfy a search.
169
+ If the requested sort does not include score, then the filterCache
170
+ will be checked for a filter matching the query. If found, the filter
171
+ will be used as the source of document ids, and then the sort will be
172
+ applied to that.
173
+ <useFilterForSortedQuery>true</useFilterForSortedQuery>
174
+ -->
175
+
176
+ <!-- An optimization for use with the queryResultCache. When a search
177
+ is requested, a superset of the requested number of document ids
178
+ are collected. For example, if a search for a particular query
179
+ requests matching documents 10 through 19, and queryWindowSize is 50,
180
+ then documents 0 through 50 will be collected and cached. Any further
181
+ requests in that range can be satisfied via the cache. -->
182
+ <queryResultWindowSize>10</queryResultWindowSize>
183
+
184
+ <!-- This entry enables an int hash representation for filters (DocSets)
185
+ when the number of items in the set is less than maxSize. For smaller
186
+ sets, this representation is more memory efficient, more efficient to
187
+ iterate over, and faster to take intersections. -->
188
+ <HashDocSet maxSize="3000" loadFactor="0.75"/>
189
+
190
+
191
+ <!-- boolToFilterOptimizer converts boolean clauses with zero boost
192
+ into cached filters if the number of docs selected by the clause exceeds
193
+ the threshold (represented as a fraction of the total index) -->
194
+ <boolTofilterOptimizer enabled="true" cacheSize="32" threshold=".05"/>
195
+
196
+
197
+ <!-- a newSearcher event is fired whenever a new searcher is being prepared
198
+ and there is a current searcher handling requests (aka registered). -->
199
+ <!-- QuerySenderListener takes an array of NamedList and executes a
200
+ local query request for each NamedList in sequence. -->
201
+ <!--
202
+ <listener event="newSearcher" class="solr.QuerySenderListener">
203
+ <arr name="queries">
204
+ <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
205
+ <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
206
+ </arr>
207
+ </listener>
208
+ -->
209
+
210
+ <!-- a firstSearcher event is fired whenever a new searcher is being
211
+ prepared but there is no current registered searcher to handle
212
+ requests or to gain autowarming data from. -->
213
+ <!--
214
+ <listener event="firstSearcher" class="solr.QuerySenderListener">
215
+ <arr name="queries">
216
+ <lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
217
+ </arr>
218
+ </listener>
219
+ -->
220
+
221
+ <!-- If a search request comes in and there is no current registered searcher,
222
+ then immediately register the still warming searcher and use it. If
223
+ "false" then all requests will block until the first searcher is done
224
+ warming. -->
225
+ <useColdSearcher>false</useColdSearcher>
226
+
227
+ <!-- Maximum number of searchers that may be warming in the background
228
+ concurrently. An error is returned if this limit is exceeded. Recommend
229
+ 1-2 for read-only slaves, higher for masters w/o cache warming. -->
230
+ <maxWarmingSearchers>4</maxWarmingSearchers>
231
+
232
+ </query>
233
+
234
+ <!--
235
+ Let the dispatch filter handler /select?qt=XXX
236
+ handleSelect=true will use consistent error handling for /select and /update
237
+ handleSelect=false will use solr1.1 style error formatting
238
+ -->
239
+ <requestDispatcher handleSelect="true">
240
+ <!--Make sure your system has some authentication before enabling remote streaming! -->
241
+ <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
242
+ </requestDispatcher>
243
+
244
+
245
+ <!-- requestHandler plugins... incoming queries will be dispatched to the
246
+ correct handler based on the qt (query type) param matching the
247
+ name of registered handlers.
248
+ The "standard" request handler is the default and will be used if qt
249
+ is not specified in the request.
250
+ -->
251
+ <requestHandler name="standard" class="solr.StandardRequestHandler">
252
+ <!-- default values for query parameters -->
253
+ <lst name="defaults">
254
+ <str name="echoParams">explicit</str>
255
+ <!--
256
+ <int name="rows">10</int>
257
+ <str name="fl">*</str>
258
+ <str name="version">2.1</str>
259
+ -->
260
+ </lst>
261
+ </requestHandler>
262
+
263
+ <!-- DisMaxRequestHandler allows easy searching across multiple fields
264
+ for simple user-entered phrases.
265
+ see http://wiki.apache.org/solr/DisMaxRequestHandler
266
+ -->
267
+ <requestHandler name="dismax" class="solr.DisMaxRequestHandler" >
268
+ <lst name="defaults">
269
+ <str name="echoParams">explicit</str>
270
+ <float name="tie">0.01</float>
271
+ <str name="qf">
272
+ text^0.5
273
+ </str>
274
+ <!-- <str name="pf"> -->
275
+ <!-- text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9 -->
276
+ <!-- </str> -->
277
+ <!-- <str name="bf"> -->
278
+ <!-- ord(poplarity)^0.5 recip(rord(price),1,1000,1000)^0.3 -->
279
+ <!-- </str> -->
280
+ <str name="fl">
281
+ id,test
282
+ </str>
283
+ <str name="mm">
284
+ 2&lt;-1 5&lt;-2 6&lt;90%
285
+ </str>
286
+ <int name="ps">100</int>
287
+ <str name="q.alt">*:*</str>
288
+ </lst>
289
+ </requestHandler>
290
+
291
+ <!-- Note how you can register the same handler multiple times with
292
+ different names (and different init parameters)
293
+ -->
294
+
295
+ <!-- SpellCheckerRequestHandler takes in a word (or several words) as the
296
+ value of the "q" parameter and returns a list of alternative spelling
297
+ suggestions. If invoked with a ...&cmd=rebuild, it will rebuild the
298
+ spellchecker index.
299
+ -->
300
+ <requestHandler name="spellchecker" class="solr.SpellCheckerRequestHandler">
301
+ <!-- default values for query parameters -->
302
+ <lst name="defaults">
303
+ <int name="suggestionCount">1</int>
304
+ <float name="accuracy">0.5</float>
305
+ </lst>
306
+
307
+ <!-- Main init params for handler -->
308
+
309
+ <!-- The directory where your SpellChecker Index should live. -->
310
+ <!-- May be absolute, or relative to the Solr "dataDir" directory. -->
311
+ <!-- If this option is not specified, a RAM directory will be used -->
312
+ <str name="spellcheckerIndexDir">spell</str>
313
+
314
+ <!-- the field in your schema that you want to be able to build -->
315
+ <!-- your spell index on. This should be a field that uses a very -->
316
+ <!-- simple FieldType without a lot of Analysis (ie: string) -->
317
+ <str name="termSourceField">word</str>
318
+
319
+ </requestHandler>
320
+
321
+
322
+ <!-- Update request handler.
323
+
324
+ Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
325
+ the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
326
+ The response format differs from solr1.1 formatting and returns a standard error code.
327
+
328
+ To enable solr1.1 behavior, remove the /update handler or change its path
329
+ -->
330
+ <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
331
+
332
+ <!-- CSV update handler, loaded on demand -->
333
+ <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
334
+
335
+ <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
336
+
337
+ <!-- Echo the request contents back to the client -->
338
+ <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
339
+ <lst name="defaults">
340
+ <str name="echoParams">explicit</str> <!-- for all params (including the default etc) use: 'all' -->
341
+ <str name="echoHandler">true</str>
342
+ </lst>
343
+ </requestHandler>
344
+
345
+ <!-- queryResponseWriter plugins... query responses will be written using the
346
+ writer specified by the 'wt' request parameter matching the name of a registered
347
+ writer.
348
+ The "standard" writer is the default and will be used if 'wt' is not specified
349
+ in the request. XMLResponseWriter will be used if nothing is specified here.
350
+ The json, python, and ruby writers are also available by default.
351
+
352
+ <queryResponseWriter name="standard" class="org.apache.solr.request.XMLResponseWriter"/>
353
+ <queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/>
354
+ <queryResponseWriter name="python" class="org.apache.solr.request.PythonResponseWriter"/>
355
+ <queryResponseWriter name="ruby" class="org.apache.solr.request.RubyResponseWriter"/>
356
+
357
+ <queryResponseWriter name="custom" class="com.example.MyResponseWriter"/>
358
+ -->
359
+
360
+ <!-- XSLT response writer transforms the XML output by any xslt file found
361
+ in Solr's conf/xslt directory. Changes to xslt files are checked for
362
+ every xsltCacheLifetimeSeconds.
363
+ -->
364
+ <queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter">
365
+ <int name="xsltCacheLifetimeSeconds">5</int>
366
+ </queryResponseWriter>
367
+
368
+ <!-- config for the admin interface -->
369
+ <admin>
370
+ <defaultQuery>solr</defaultQuery>
371
+ <!-- configure a healthcheck file for servers behind a loadbalancer
372
+ <healthcheck type="file">server-enabled</healthcheck>
373
+ -->
374
+ </admin>
375
+
376
+ </config>
@@ -0,0 +1,58 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one or more
2
+ # contributor license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright ownership.
4
+ # The ASF licenses this file to You under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with
6
+ # the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ #-----------------------------------------------------------------------
17
+ # a couple of test stopwords to test that the words are really being
18
+ # configured from this file:
19
+ stopworda
20
+ stopwordb
21
+
22
+ #Standard english stop words taken from Lucene's StopAnalyzer
23
+ a
24
+ an
25
+ and
26
+ are
27
+ as
28
+ at
29
+ be
30
+ but
31
+ by
32
+ for
33
+ if
34
+ in
35
+ into
36
+ is
37
+ it
38
+ no
39
+ not
40
+ of
41
+ on
42
+ or
43
+ s
44
+ such
45
+ t
46
+ that
47
+ the
48
+ their
49
+ then
50
+ there
51
+ these
52
+ they
53
+ this
54
+ to
55
+ was
56
+ will
57
+ with
58
+