solr_wrapper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,572 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ For more details about configurations options that may appear in
21
+ this file, see http://wiki.apache.org/solr/SolrConfigXml.
22
+ -->
23
+ <config>
24
+ <!-- In all configuration below, a prefix of "solr." for class names
25
+ is an alias that causes solr to search appropriate packages,
26
+ including org.apache.solr.(search|update|request|core|analysis)
27
+
28
+ You may also specify a fully qualified Java classname if you
29
+ have your own custom plugins.
30
+ -->
31
+
32
+ <!-- Controls what version of Lucene various components of Solr
33
+ adhere to. Generally, you want to use the latest version to
34
+ get all bug fixes and improvements. It is highly recommended
35
+ that you fully re-index after changing this setting as it can
36
+ affect both how text is indexed and queried.
37
+ -->
38
+ <luceneMatchVersion>5.0.0</luceneMatchVersion>
39
+
40
+ <!-- Data Directory
41
+
42
+ Used to specify an alternate directory to hold all index data
43
+ other than the default ./data under the Solr home. If
44
+ replication is in use, this should match the replication
45
+ configuration.
46
+ -->
47
+ <dataDir>${solr.data.dir:}</dataDir>
48
+
49
+
50
+ <!-- The DirectoryFactory to use for indexes.
51
+
52
+ solr.StandardDirectoryFactory is filesystem
53
+ based and tries to pick the best implementation for the current
54
+ JVM and platform. solr.NRTCachingDirectoryFactory, the default,
55
+ wraps solr.StandardDirectoryFactory and caches small files in memory
56
+ for better NRT performance.
57
+
58
+ One can force a particular implementation via solr.MMapDirectoryFactory,
59
+ solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
60
+
61
+ solr.RAMDirectoryFactory is memory based, not
62
+ persistent, and doesn't work with replication.
63
+ -->
64
+ <directoryFactory name="DirectoryFactory"
65
+ class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
66
+ </directoryFactory>
67
+
68
+ <!-- The CodecFactory for defining the format of the inverted index.
69
+ The default implementation is SchemaCodecFactory, which is the official Lucene
70
+ index format, but hooks into the schema to provide per-field customization of
71
+ the postings lists and per-document values in the fieldType element
72
+ (postingsFormat/docValuesFormat). Note that most of the alternative implementations
73
+ are experimental, so if you choose to customize the index format, it's a good
74
+ idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
75
+ before upgrading to a newer version to avoid unnecessary reindexing.
76
+ -->
77
+ <codecFactory class="solr.SchemaCodecFactory"/>
78
+
79
+ <schemaFactory class="ClassicIndexSchemaFactory"/>
80
+
81
+ <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
82
+ Index Config - These settings control low-level behavior of indexing
83
+ Most example settings here show the default value, but are commented
84
+ out, to more easily see where customizations have been made.
85
+
86
+ Note: This replaces <indexDefaults> and <mainIndex> from older versions
87
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
88
+ <indexConfig>
89
+
90
+ <!-- LockFactory
91
+
92
+ This option specifies which Lucene LockFactory implementation
93
+ to use.
94
+
95
+ single = SingleInstanceLockFactory - suggested for a
96
+ read-only index or when there is no possibility of
97
+ another process trying to modify the index.
98
+ native = NativeFSLockFactory - uses OS native file locking.
99
+ Do not use when multiple solr webapps in the same
100
+ JVM are attempting to share a single index.
101
+ simple = SimpleFSLockFactory - uses a plain file for locking
102
+
103
+ Defaults: 'native' is default for Solr3.6 and later, otherwise
104
+ 'simple' is the default
105
+
106
+ More details on the nuances of each LockFactory...
107
+ http://wiki.apache.org/lucene-java/AvailableLockFactories
108
+ -->
109
+ <lockType>${solr.lock.type:native}</lockType>
110
+
111
+ <!-- Lucene Infostream
112
+
113
+ To aid in advanced debugging, Lucene provides an "InfoStream"
114
+ of detailed information when indexing.
115
+
116
+ Setting the value to true will instruct the underlying Lucene
117
+ IndexWriter to write its info stream to solr's log. By default,
118
+ this is enabled here, and controlled through log4j.properties.
119
+ -->
120
+ <infoStream>true</infoStream>
121
+ </indexConfig>
122
+
123
+
124
+ <!-- JMX
125
+
126
+ This example enables JMX if and only if an existing MBeanServer
127
+ is found, use this if you want to configure JMX through JVM
128
+ parameters. Remove this to disable exposing Solr configuration
129
+ and statistics to JMX.
130
+
131
+ For more details see http://wiki.apache.org/solr/SolrJmx
132
+ -->
133
+ <jmx />
134
+ <!-- If you want to connect to a particular server, specify the
135
+ agentId
136
+ -->
137
+ <!-- <jmx agentId="myAgent" /> -->
138
+ <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
139
+ <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
140
+ -->
141
+
142
+ <!-- The default high-performance update handler -->
143
+ <updateHandler class="solr.DirectUpdateHandler2">
144
+
145
+ <!-- Enables a transaction log, used for real-time get, durability, and
146
+ and solr cloud replica recovery. The log can grow as big as
147
+ uncommitted changes to the index, so use of a hard autoCommit
148
+ is recommended (see below).
149
+ "dir" - the target directory for transaction logs, defaults to the
150
+ solr data directory. -->
151
+ <updateLog>
152
+ <str name="dir">${solr.ulog.dir:}</str>
153
+ </updateLog>
154
+
155
+ <!-- AutoCommit
156
+
157
+ Perform a hard commit automatically under certain conditions.
158
+ Instead of enabling autoCommit, consider using "commitWithin"
159
+ when adding documents.
160
+
161
+ http://wiki.apache.org/solr/UpdateXmlMessages
162
+
163
+ maxDocs - Maximum number of documents to add since the last
164
+ commit before automatically triggering a new commit.
165
+
166
+ maxTime - Maximum amount of time in ms that is allowed to pass
167
+ since a document was added before automatically
168
+ triggering a new commit.
169
+ openSearcher - if false, the commit causes recent index changes
170
+ to be flushed to stable storage, but does not cause a new
171
+ searcher to be opened to make those changes visible.
172
+
173
+ If the updateLog is enabled, then it's highly recommended to
174
+ have some sort of hard autoCommit to limit the log size.
175
+ -->
176
+ <autoCommit>
177
+ <maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
178
+ <openSearcher>false</openSearcher>
179
+ </autoCommit>
180
+
181
+ <!-- softAutoCommit is like autoCommit except it causes a
182
+ 'soft' commit which only ensures that changes are visible
183
+ but does not ensure that data is synced to disk. This is
184
+ faster and more near-realtime friendly than a hard commit.
185
+ -->
186
+ <autoSoftCommit>
187
+ <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
188
+ </autoSoftCommit>
189
+
190
+ </updateHandler>
191
+
192
+ <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
193
+ Query section - these settings control query time things like caches
194
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
195
+ <query>
196
+ <!-- Max Boolean Clauses
197
+
198
+ Maximum number of clauses in each BooleanQuery, an exception
199
+ is thrown if exceeded.
200
+
201
+ ** WARNING **
202
+
203
+ This option actually modifies a global Lucene property that
204
+ will affect all SolrCores. If multiple solrconfig.xml files
205
+ disagree on this property, the value at any given moment will
206
+ be based on the last SolrCore to be initialized.
207
+
208
+ -->
209
+ <maxBooleanClauses>1024</maxBooleanClauses>
210
+
211
+
212
+ <!-- Solr Internal Query Caches
213
+
214
+ There are two implementations of cache available for Solr,
215
+ LRUCache, based on a synchronized LinkedHashMap, and
216
+ FastLRUCache, based on a ConcurrentHashMap.
217
+
218
+ FastLRUCache has faster gets and slower puts in single
219
+ threaded operation and thus is generally faster than LRUCache
220
+ when the hit ratio of the cache is high (> 75%), and may be
221
+ faster under other scenarios on multi-cpu systems.
222
+ -->
223
+
224
+ <!-- Filter Cache
225
+
226
+ Cache used by SolrIndexSearcher for filters (DocSets),
227
+ unordered sets of *all* documents that match a query. When a
228
+ new searcher is opened, its caches may be prepopulated or
229
+ "autowarmed" using data from caches in the old searcher.
230
+ autowarmCount is the number of items to prepopulate. For
231
+ LRUCache, the autowarmed items will be the most recently
232
+ accessed items.
233
+
234
+ Parameters:
235
+ class - the SolrCache implementation LRUCache or
236
+ (LRUCache or FastLRUCache)
237
+ size - the maximum number of entries in the cache
238
+ initialSize - the initial capacity (number of entries) of
239
+ the cache. (see java.util.HashMap)
240
+ autowarmCount - the number of entries to prepopulate from
241
+ and old cache.
242
+ -->
243
+ <filterCache class="solr.FastLRUCache"
244
+ size="512"
245
+ initialSize="512"
246
+ autowarmCount="0"/>
247
+
248
+ <!-- Query Result Cache
249
+
250
+ Caches results of searches - ordered lists of document ids
251
+ (DocList) based on a query, a sort, and the range of documents requested.
252
+ -->
253
+ <queryResultCache class="solr.LRUCache"
254
+ size="512"
255
+ initialSize="512"
256
+ autowarmCount="0"/>
257
+
258
+ <!-- Document Cache
259
+
260
+ Caches Lucene Document objects (the stored fields for each
261
+ document). Since Lucene internal document ids are transient,
262
+ this cache will not be autowarmed.
263
+ -->
264
+ <documentCache class="solr.LRUCache"
265
+ size="512"
266
+ initialSize="512"
267
+ autowarmCount="0"/>
268
+
269
+ <!-- custom cache currently used by block join -->
270
+ <cache name="perSegFilter"
271
+ class="solr.search.LRUCache"
272
+ size="10"
273
+ initialSize="0"
274
+ autowarmCount="10"
275
+ regenerator="solr.NoOpRegenerator" />
276
+
277
+ <!-- Lazy Field Loading
278
+
279
+ If true, stored fields that are not requested will be loaded
280
+ lazily. This can result in a significant speed improvement
281
+ if the usual case is to not load all stored fields,
282
+ especially if the skipped fields are large compressed text
283
+ fields.
284
+ -->
285
+ <enableLazyFieldLoading>true</enableLazyFieldLoading>
286
+
287
+ <!-- Result Window Size
288
+
289
+ An optimization for use with the queryResultCache. When a search
290
+ is requested, a superset of the requested number of document ids
291
+ are collected. For example, if a search for a particular query
292
+ requests matching documents 10 through 19, and queryWindowSize is 50,
293
+ then documents 0 through 49 will be collected and cached. Any further
294
+ requests in that range can be satisfied via the cache.
295
+ -->
296
+ <queryResultWindowSize>20</queryResultWindowSize>
297
+
298
+ <!-- Maximum number of documents to cache for any entry in the
299
+ queryResultCache.
300
+ -->
301
+ <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
302
+
303
+ <!-- Use Cold Searcher
304
+
305
+ If a search request comes in and there is no current
306
+ registered searcher, then immediately register the still
307
+ warming searcher and use it. If "false" then all requests
308
+ will block until the first searcher is done warming.
309
+ -->
310
+ <useColdSearcher>false</useColdSearcher>
311
+
312
+ <!-- Max Warming Searchers
313
+
314
+ Maximum number of searchers that may be warming in the
315
+ background concurrently. An error is returned if this limit
316
+ is exceeded.
317
+
318
+ Recommend values of 1-2 for read-only slaves, higher for
319
+ masters w/o cache warming.
320
+ -->
321
+ <maxWarmingSearchers>2</maxWarmingSearchers>
322
+
323
+ </query>
324
+
325
+
326
+ <!-- Request Dispatcher
327
+
328
+ This section contains instructions for how the SolrDispatchFilter
329
+ should behave when processing requests for this SolrCore.
330
+
331
+ handleSelect is a legacy option that affects the behavior of requests
332
+ such as /select?qt=XXX
333
+
334
+ handleSelect="true" will cause the SolrDispatchFilter to process
335
+ the request and dispatch the query to a handler specified by the
336
+ "qt" param, assuming "/select" isn't already registered.
337
+
338
+ handleSelect="false" will cause the SolrDispatchFilter to
339
+ ignore "/select" requests, resulting in a 404 unless a handler
340
+ is explicitly registered with the name "/select"
341
+
342
+ handleSelect="true" is not recommended for new users, but is the default
343
+ for backwards compatibility
344
+ -->
345
+ <requestDispatcher handleSelect="false" >
346
+ <!-- Request Parsing
347
+
348
+ These settings indicate how Solr Requests may be parsed, and
349
+ what restrictions may be placed on the ContentStreams from
350
+ those requests
351
+
352
+ enableRemoteStreaming - enables use of the stream.file
353
+ and stream.url parameters for specifying remote streams.
354
+
355
+ multipartUploadLimitInKB - specifies the max size (in KiB) of
356
+ Multipart File Uploads that Solr will allow in a Request.
357
+
358
+ formdataUploadLimitInKB - specifies the max size (in KiB) of
359
+ form data (application/x-www-form-urlencoded) sent via
360
+ POST. You can use POST to pass request parameters not
361
+ fitting into the URL.
362
+
363
+ addHttpRequestToContext - if set to true, it will instruct
364
+ the requestParsers to include the original HttpServletRequest
365
+ object in the context map of the SolrQueryRequest under the
366
+ key "httpRequest". It will not be used by any of the existing
367
+ Solr components, but may be useful when developing custom
368
+ plugins.
369
+
370
+ *** WARNING ***
371
+ The settings below authorize Solr to fetch remote files, You
372
+ should make sure your system has some authentication before
373
+ using enableRemoteStreaming="true"
374
+
375
+ -->
376
+ <requestParsers enableRemoteStreaming="true"
377
+ multipartUploadLimitInKB="2048000"
378
+ formdataUploadLimitInKB="2048"
379
+ addHttpRequestToContext="false"/>
380
+
381
+ <!-- HTTP Caching
382
+
383
+ Set HTTP caching related parameters (for proxy caches and clients).
384
+
385
+ The options below instruct Solr not to output any HTTP Caching
386
+ related headers
387
+ -->
388
+ <httpCaching never304="true" />
389
+
390
+ </requestDispatcher>
391
+
392
+ <!-- Request Handlers
393
+
394
+ http://wiki.apache.org/solr/SolrRequestHandler
395
+
396
+ Incoming queries will be dispatched to a specific handler by name
397
+ based on the path specified in the request.
398
+
399
+ Legacy behavior: If the request path uses "/select" but no Request
400
+ Handler has that name, and if handleSelect="true" has been specified in
401
+ the requestDispatcher, then the Request Handler is dispatched based on
402
+ the qt parameter. Handlers without a leading '/' are accessed this way
403
+ like so: http://host/app/[core/]select?qt=name If no qt is
404
+ given, then the requestHandler that declares default="true" will be
405
+ used or the one named "standard".
406
+
407
+ If a Request Handler is declared with startup="lazy", then it will
408
+ not be initialized until the first request that uses it.
409
+
410
+ -->
411
+ <!-- SearchHandler
412
+
413
+ http://wiki.apache.org/solr/SearchHandler
414
+
415
+ For processing Search Queries, the primary Request Handler
416
+ provided with Solr is "SearchHandler" It delegates to a sequent
417
+ of SearchComponents (see below) and supports distributed
418
+ queries across multiple shards
419
+ -->
420
+ <requestHandler name="/select" class="solr.SearchHandler">
421
+ <!-- default values for query parameters can be specified, these
422
+ will be overridden by parameters in the request
423
+ -->
424
+ <lst name="defaults">
425
+ <str name="echoParams">explicit</str>
426
+ <int name="rows">10</int>
427
+ </lst>
428
+
429
+ </requestHandler>
430
+
431
+ <!-- A request handler that returns indented JSON by default -->
432
+ <requestHandler name="/query" class="solr.SearchHandler">
433
+ <lst name="defaults">
434
+ <str name="echoParams">explicit</str>
435
+ <str name="wt">json</str>
436
+ <str name="indent">true</str>
437
+ <str name="df">text</str>
438
+ </lst>
439
+ </requestHandler>
440
+
441
+ <!--
442
+ The export request handler is used to export full sorted result sets.
443
+ Do not change these defaults.
444
+ -->
445
+ <requestHandler name="/export" class="solr.SearchHandler">
446
+ <lst name="invariants">
447
+ <str name="rq">{!xport}</str>
448
+ <str name="wt">xsort</str>
449
+ <str name="distrib">false</str>
450
+ </lst>
451
+
452
+ <arr name="components">
453
+ <str>query</str>
454
+ </arr>
455
+ </requestHandler>
456
+
457
+
458
+ <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell">
459
+ <lst name="defaults">
460
+ <str name="df">text</str>
461
+ </lst>
462
+ </initParams>
463
+
464
+ <!-- Field Analysis Request Handler
465
+
466
+ RequestHandler that provides much the same functionality as
467
+ analysis.jsp. Provides the ability to specify multiple field
468
+ types and field names in the same request and outputs
469
+ index-time and query-time analysis for each of them.
470
+
471
+ Request parameters are:
472
+ analysis.fieldname - field name whose analyzers are to be used
473
+
474
+ analysis.fieldtype - field type whose analyzers are to be used
475
+ analysis.fieldvalue - text for index-time analysis
476
+ q (or analysis.q) - text for query time analysis
477
+ analysis.showmatch (true|false) - When set to true and when
478
+ query analysis is performed, the produced tokens of the
479
+ field value analysis will be marked as "matched" for every
480
+ token that is produces by the query analysis
481
+ -->
482
+ <requestHandler name="/analysis/field"
483
+ startup="lazy"
484
+ class="solr.FieldAnalysisRequestHandler" />
485
+
486
+
487
+ <!-- Document Analysis Handler
488
+
489
+ http://wiki.apache.org/solr/AnalysisRequestHandler
490
+
491
+ An analysis handler that provides a breakdown of the analysis
492
+ process of provided documents. This handler expects a (single)
493
+ content stream with the following format:
494
+
495
+ <docs>
496
+ <doc>
497
+ <field name="id">1</field>
498
+ <field name="name">The Name</field>
499
+ <field name="text">The Text Value</field>
500
+ </doc>
501
+ <doc>...</doc>
502
+ <doc>...</doc>
503
+ ...
504
+ </docs>
505
+
506
+ Note: Each document must contain a field which serves as the
507
+ unique key. This key is used in the returned response to associate
508
+ an analysis breakdown to the analyzed document.
509
+
510
+ Like the FieldAnalysisRequestHandler, this handler also supports
511
+ query analysis by sending either an "analysis.query" or "q"
512
+ request parameter that holds the query text to be analyzed. It
513
+ also supports the "analysis.showmatch" parameter which when set to
514
+ true, all field tokens that match the query tokens will be marked
515
+ as a "match".
516
+ -->
517
+ <requestHandler name="/analysis/document"
518
+ class="solr.DocumentAnalysisRequestHandler"
519
+ startup="lazy" />
520
+
521
+ <!-- Echo the request contents back to the client -->
522
+ <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
523
+ <lst name="defaults">
524
+ <str name="echoParams">explicit</str>
525
+ <str name="echoHandler">true</str>
526
+ </lst>
527
+ </requestHandler>
528
+
529
+
530
+
531
+ <!-- Search Components
532
+
533
+ Search components are registered to SolrCore and used by
534
+ instances of SearchHandler (which can access them by name)
535
+
536
+ By default, the following components are available:
537
+
538
+ <searchComponent name="query" class="solr.QueryComponent" />
539
+ <searchComponent name="facet" class="solr.FacetComponent" />
540
+ <searchComponent name="mlt" class="solr.MoreLikeThisComponent" />
541
+ <searchComponent name="highlight" class="solr.HighlightComponent" />
542
+ <searchComponent name="stats" class="solr.StatsComponent" />
543
+ <searchComponent name="debug" class="solr.DebugComponent" />
544
+
545
+ -->
546
+
547
+ <!-- Terms Component
548
+
549
+ http://wiki.apache.org/solr/TermsComponent
550
+
551
+ A component to return terms and document frequency of those
552
+ terms
553
+ -->
554
+ <searchComponent name="terms" class="solr.TermsComponent"/>
555
+
556
+ <!-- A request handler for demonstrating the terms component -->
557
+ <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
558
+ <lst name="defaults">
559
+ <bool name="terms">true</bool>
560
+ <bool name="distrib">false</bool>
561
+ </lst>
562
+ <arr name="components">
563
+ <str>terms</str>
564
+ </arr>
565
+ </requestHandler>
566
+
567
+ <!-- Legacy config for the admin interface -->
568
+ <admin>
569
+ <defaultQuery>*:*</defaultQuery>
570
+ </admin>
571
+
572
+ </config>