simple_solr_client 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +349 -0
- data/Rakefile +11 -0
- data/lib/simple_solr.rb +42 -0
- data/lib/simple_solr/client.rb +139 -0
- data/lib/simple_solr/client/core_admin.rb +0 -0
- data/lib/simple_solr/core.rb +50 -0
- data/lib/simple_solr/core/admin.rb +47 -0
- data/lib/simple_solr/core/core_data.rb +51 -0
- data/lib/simple_solr/core/index.rb +25 -0
- data/lib/simple_solr/core/search.rb +21 -0
- data/lib/simple_solr/response/document.rb +45 -0
- data/lib/simple_solr/response/generic_response.rb +19 -0
- data/lib/simple_solr/response/query_response.rb +54 -0
- data/lib/simple_solr/schema.rb +261 -0
- data/lib/simple_solr/schema/analysis.rb +58 -0
- data/lib/simple_solr/schema/copyfield.rb +42 -0
- data/lib/simple_solr/schema/dynamic_field.rb +23 -0
- data/lib/simple_solr/schema/field.rb +35 -0
- data/lib/simple_solr/schema/field_or_type.rb +112 -0
- data/lib/simple_solr/schema/field_type.rb +62 -0
- data/lib/simple_solr/schema/matcher.rb +16 -0
- data/lib/simple_solr/version.rb +3 -0
- data/simple_solr_client.gemspec +39 -0
- data/solr_sample_core/conf/_schema_analysis_stopwords_english.json +38 -0
- data/solr_sample_core/conf/_schema_analysis_synonyms_english.json +11 -0
- data/solr_sample_core/conf/admin-extra.html +24 -0
- data/solr_sample_core/conf/admin-extra.menu-bottom.html +25 -0
- data/solr_sample_core/conf/admin-extra.menu-top.html +25 -0
- data/solr_sample_core/conf/clustering/carrot2/kmeans-attributes.xml +19 -0
- data/solr_sample_core/conf/clustering/carrot2/lingo-attributes.xml +24 -0
- data/solr_sample_core/conf/clustering/carrot2/stc-attributes.xml +19 -0
- data/solr_sample_core/conf/currency.xml +67 -0
- data/solr_sample_core/conf/elevate.xml +38 -0
- data/solr_sample_core/conf/lang/contractions_ca.txt +8 -0
- data/solr_sample_core/conf/lang/contractions_fr.txt +15 -0
- data/solr_sample_core/conf/lang/contractions_ga.txt +5 -0
- data/solr_sample_core/conf/lang/contractions_it.txt +23 -0
- data/solr_sample_core/conf/lang/hyphenations_ga.txt +5 -0
- data/solr_sample_core/conf/lang/stemdict_nl.txt +6 -0
- data/solr_sample_core/conf/lang/stoptags_ja.txt +420 -0
- data/solr_sample_core/conf/lang/stopwords_ar.txt +125 -0
- data/solr_sample_core/conf/lang/stopwords_bg.txt +193 -0
- data/solr_sample_core/conf/lang/stopwords_ca.txt +220 -0
- data/solr_sample_core/conf/lang/stopwords_ckb.txt +136 -0
- data/solr_sample_core/conf/lang/stopwords_cz.txt +172 -0
- data/solr_sample_core/conf/lang/stopwords_da.txt +110 -0
- data/solr_sample_core/conf/lang/stopwords_de.txt +294 -0
- data/solr_sample_core/conf/lang/stopwords_el.txt +78 -0
- data/solr_sample_core/conf/lang/stopwords_en.txt +54 -0
- data/solr_sample_core/conf/lang/stopwords_es.txt +356 -0
- data/solr_sample_core/conf/lang/stopwords_eu.txt +99 -0
- data/solr_sample_core/conf/lang/stopwords_fa.txt +313 -0
- data/solr_sample_core/conf/lang/stopwords_fi.txt +97 -0
- data/solr_sample_core/conf/lang/stopwords_fr.txt +186 -0
- data/solr_sample_core/conf/lang/stopwords_ga.txt +110 -0
- data/solr_sample_core/conf/lang/stopwords_gl.txt +161 -0
- data/solr_sample_core/conf/lang/stopwords_hi.txt +235 -0
- data/solr_sample_core/conf/lang/stopwords_hu.txt +211 -0
- data/solr_sample_core/conf/lang/stopwords_hy.txt +46 -0
- data/solr_sample_core/conf/lang/stopwords_id.txt +359 -0
- data/solr_sample_core/conf/lang/stopwords_it.txt +303 -0
- data/solr_sample_core/conf/lang/stopwords_ja.txt +127 -0
- data/solr_sample_core/conf/lang/stopwords_lv.txt +172 -0
- data/solr_sample_core/conf/lang/stopwords_nl.txt +119 -0
- data/solr_sample_core/conf/lang/stopwords_no.txt +194 -0
- data/solr_sample_core/conf/lang/stopwords_pt.txt +253 -0
- data/solr_sample_core/conf/lang/stopwords_ro.txt +233 -0
- data/solr_sample_core/conf/lang/stopwords_ru.txt +243 -0
- data/solr_sample_core/conf/lang/stopwords_sv.txt +133 -0
- data/solr_sample_core/conf/lang/stopwords_th.txt +119 -0
- data/solr_sample_core/conf/lang/stopwords_tr.txt +212 -0
- data/solr_sample_core/conf/lang/userdict_ja.txt +29 -0
- data/solr_sample_core/conf/mapping-FoldToASCII.txt +3813 -0
- data/solr_sample_core/conf/mapping-ISOLatin1Accent.txt +246 -0
- data/solr_sample_core/conf/protwords.txt +21 -0
- data/solr_sample_core/conf/schema.xml +62 -0
- data/solr_sample_core/conf/scripts.conf +24 -0
- data/solr_sample_core/conf/solrconfig.xml +1702 -0
- data/solr_sample_core/conf/spellings.txt +2 -0
- data/solr_sample_core/conf/stopwords.txt +14 -0
- data/solr_sample_core/conf/syn.txt +0 -0
- data/solr_sample_core/conf/synonyms.txt +29 -0
- data/solr_sample_core/conf/token_fixing_charfilter.txt +110 -0
- data/solr_sample_core/conf/update-script.js +53 -0
- data/solr_sample_core/conf/velocity/README.txt +101 -0
- data/solr_sample_core/conf/velocity/VM_global_library.vm +175 -0
- data/solr_sample_core/conf/velocity/browse.vm +33 -0
- data/solr_sample_core/conf/velocity/cluster.vm +19 -0
- data/solr_sample_core/conf/velocity/cluster_results.vm +31 -0
- data/solr_sample_core/conf/velocity/debug.vm +28 -0
- data/solr_sample_core/conf/velocity/did_you_mean.vm +9 -0
- data/solr_sample_core/conf/velocity/error.vm +11 -0
- data/solr_sample_core/conf/velocity/facet_fields.vm +23 -0
- data/solr_sample_core/conf/velocity/facet_pivot.vm +12 -0
- data/solr_sample_core/conf/velocity/facet_queries.vm +12 -0
- data/solr_sample_core/conf/velocity/facet_ranges.vm +23 -0
- data/solr_sample_core/conf/velocity/facets.vm +10 -0
- data/solr_sample_core/conf/velocity/footer.vm +43 -0
- data/solr_sample_core/conf/velocity/head.vm +35 -0
- data/solr_sample_core/conf/velocity/header.vm +7 -0
- data/solr_sample_core/conf/velocity/hit.vm +25 -0
- data/solr_sample_core/conf/velocity/hit_grouped.vm +43 -0
- data/solr_sample_core/conf/velocity/hit_plain.vm +25 -0
- data/solr_sample_core/conf/velocity/join_doc.vm +20 -0
- data/solr_sample_core/conf/velocity/jquery.autocomplete.css +48 -0
- data/solr_sample_core/conf/velocity/jquery.autocomplete.js +763 -0
- data/solr_sample_core/conf/velocity/layout.vm +24 -0
- data/solr_sample_core/conf/velocity/main.css +230 -0
- data/solr_sample_core/conf/velocity/mime_type_lists.vm +68 -0
- data/solr_sample_core/conf/velocity/pagination_bottom.vm +22 -0
- data/solr_sample_core/conf/velocity/pagination_top.vm +29 -0
- data/solr_sample_core/conf/velocity/product_doc.vm +32 -0
- data/solr_sample_core/conf/velocity/query.vm +42 -0
- data/solr_sample_core/conf/velocity/query_form.vm +64 -0
- data/solr_sample_core/conf/velocity/query_group.vm +43 -0
- data/solr_sample_core/conf/velocity/query_spatial.vm +75 -0
- data/solr_sample_core/conf/velocity/results_list.vm +22 -0
- data/solr_sample_core/conf/velocity/richtext_doc.vm +153 -0
- data/solr_sample_core/conf/velocity/suggest.vm +8 -0
- data/solr_sample_core/conf/velocity/tabs.vm +50 -0
- data/solr_sample_core/conf/xslt/example.xsl +132 -0
- data/solr_sample_core/conf/xslt/example_atom.xsl +67 -0
- data/solr_sample_core/conf/xslt/example_rss.xsl +66 -0
- data/solr_sample_core/conf/xslt/luke.xsl +337 -0
- data/solr_sample_core/conf/xslt/updateXml.xsl +70 -0
- data/spec/client_basics_spec.rb +26 -0
- data/spec/connect_spec.rb +25 -0
- data/spec/core_basics.rb +21 -0
- data/spec/index_spec.rb +31 -0
- data/spec/load_spec.rb +7 -0
- data/spec/minitest_helper.rb +36 -0
- data/spec/schema_spec.rb +113 -0
- metadata +284 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6f960c75573490bad51ced465320ab2e824a15df
|
4
|
+
data.tar.gz: 0e70ff3edc2146651a56f578afd9a420120f85c3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f76f69c12364d15b6461c173309e3e614e073a7f1d7f04fa0a54f04a483a4a6647c4a0fb49eb9b9cc767386d209459aac39cf122db9c2262ed7c7eb6e541bff8
|
7
|
+
data.tar.gz: bec08b9cc97e2d9ac00fa9b34c10e78cc48ca0b9dcd788a5d967eab097bdba94f0eec34966b58f0e1c169930b83f5ba217ce61735711f1aa868381490a99eaa0
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Bill Dueber
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,349 @@
|
|
1
|
+
# SimpleSolrClient
|
2
|
+
|
3
|
+
[Note: still woefully incomplete, but in the spirit of "release early,
|
4
|
+
even if it's bad", here it is.]
|
5
|
+
|
6
|
+
A Solr client specifically designed to try to help you test what the heck
|
7
|
+
solr is actually doing.
|
8
|
+
|
9
|
+
Most useful when running on the same machine as the solr install, but
|
10
|
+
still useful even when you're not.
|
11
|
+
|
12
|
+
|
13
|
+
## Motivation
|
14
|
+
|
15
|
+
Solr is complex.
|
16
|
+
|
17
|
+
It's complex enough, and fuddles with enough edge cases, that reading
|
18
|
+
the documentation and/or the code doesn't get me the understanding
|
19
|
+
that I feel I need.
|
20
|
+
|
21
|
+
If I were smarter, maybe I wouldn't need something like this.
|
22
|
+
|
23
|
+
I wanted a way to test what solr is actually doing, and
|
24
|
+
this library is a way for me to start to do that in a fashion that's
|
25
|
+
more convenient that doing everything "by hand" in the admin dashboard
|
26
|
+
or running queries via URLs in my browser or using curl.
|
27
|
+
|
28
|
+
I wanted a way to figure out what fields (of what types) are being created,
|
29
|
+
how things were being tokenized, etc., but all within the comfort of a test
|
30
|
+
suite that I could run against solr configurations to make sure things
|
31
|
+
weren't breaking when I made changes. I wanted to build up a structure around relevance
|
32
|
+
ranking tests (still coming, sadly) and quickly swap out different
|
33
|
+
configs to make sure it all works as I expect.
|
34
|
+
|
35
|
+
So: a simple solr library, with more exposure than most of what's out there
|
36
|
+
to the solr administration API and the introspection/analysis it affords.
|
37
|
+
|
38
|
+
# Features:
|
39
|
+
|
40
|
+
* Basic add/delete/query
|
41
|
+
* Commit/optimize/clear an index
|
42
|
+
* Reload a core after editing/adjusting a config file
|
43
|
+
* Inspect lists of fields, dynamicFields, copyFields, and
|
44
|
+
fieldTypes
|
45
|
+
* Determine which fields (and their properties) would be
|
46
|
+
created when a given field name is indexed, taking into
|
47
|
+
account dynamicField and copyField directives.
|
48
|
+
* Get list of the tokens that would be created if you
|
49
|
+
send a string to a paricular fieldType (like in the
|
50
|
+
solr admin analysis page)
|
51
|
+
* Spit a modified schema object back out as xml for
|
52
|
+
saving somewhere if you'd like
|
53
|
+
|
54
|
+
Additional features when running against a localhost solr:
|
55
|
+
* Spin up a temporary core to play with
|
56
|
+
* Add/remove fields, dynamic_fields, copy_fields, and field types
|
57
|
+
on the fly and save them back, ready for a reload
|
58
|
+
* Create temporary cores for doing testing
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
## Basic add and delete of documents, and simple queries
|
63
|
+
|
64
|
+
Right now, it supports only the most basic add/delete/query operations.
|
65
|
+
Adding in support for more complex queries is on the TODO list, but took
|
66
|
+
a back seat to dealing with the schema.
|
67
|
+
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
|
71
|
+
# A "client" points to a running solr, independent of the particular core
|
72
|
+
# You get a core from it.
|
73
|
+
|
74
|
+
client = SimpleSolrClient::Client.new('http://localhost:8983/solr')
|
75
|
+
core = client.core('core1') # must already exist!
|
76
|
+
core.url #=> "http://localhost:8983/solr/core1"
|
77
|
+
|
78
|
+
core.name #=> 'core1'
|
79
|
+
core.number_of_documents #=> 7, what was in there already
|
80
|
+
core.instance_dir #=> "/Users/dueberb/devel/java/solr/example/solr/collection1/"
|
81
|
+
core.schema_file #=> <path>/<to>/<schema.xml>
|
82
|
+
|
83
|
+
# Remove all the indexed documents and (automatically) commit
|
84
|
+
core.clear
|
85
|
+
|
86
|
+
# Add documents
|
87
|
+
#
|
88
|
+
# name_t is a text_general, multiValued, indexed, stored field
|
89
|
+
h1 = {:id => 'b', :name_t=>"Bill Dueber"}
|
90
|
+
h2 = {:id => 'd', :name_t=>"Danit Brown"}
|
91
|
+
h3 = {:id => 'z', :name_t=>"Ziv Brown Dueber"}
|
92
|
+
|
93
|
+
core.add_docs(h1)
|
94
|
+
|
95
|
+
core.number_of_documents #=> 0? But why? Oh, right...
|
96
|
+
core.commit
|
97
|
+
core.number_of_documents #=> 1 There we go
|
98
|
+
|
99
|
+
# You can chain many core operations
|
100
|
+
core.clear.add_docs([h1,h2, h3]).commit.optimize.number_of_documents #=> 3
|
101
|
+
|
102
|
+
# only the most basic querying is currently supported
|
103
|
+
# Result of a query is a QueryResponse, which contains a list of Document
|
104
|
+
# objects, which respond to ['fieldname']
|
105
|
+
|
106
|
+
# All bring back all documents up to the page limit
|
107
|
+
core.all.size #=> 3
|
108
|
+
core.all.map{|d| d['name_t']} #=> [['Bill Dueber'], ['Danit Brown'], ['Ziv Brown Dueber']]
|
109
|
+
|
110
|
+
# Simple field/value search
|
111
|
+
docs = core.fv_search(:name_t, 'Brown')
|
112
|
+
docs.class #=> SimpleSolrClient::Response::QueryResponse
|
113
|
+
|
114
|
+
docs.size #=> 2
|
115
|
+
docs..map{|d| d['name_t']} #=> [['Danit Brown'], ['Ziv Brown Dueber']]
|
116
|
+
|
117
|
+
# Special-case id/score as regular methods
|
118
|
+
docs.first.id #=> 'd'
|
119
|
+
docs.first.score #=> 0.625
|
120
|
+
|
121
|
+
# Figure out where documents fall. "Ziv Brown Dueber" contains both
|
122
|
+
# search terms, so should come first
|
123
|
+
docs = core.fv_search(:name_t, 'Brown Dueber')
|
124
|
+
docs.size #=> 3
|
125
|
+
|
126
|
+
docs.rank('z') #=> 1 (check by id)
|
127
|
+
docs.rank('z') < docs.rank('b') #=> true
|
128
|
+
|
129
|
+
# Of course, we can do it by score
|
130
|
+
docs.score('z') > docs.score('d')
|
131
|
+
|
132
|
+
# In addition to #clear, we can delete by simple query
|
133
|
+
core.delete('name_t:Dueber').commit.number_of_documents #=> 1
|
134
|
+
|
135
|
+
|
136
|
+
```
|
137
|
+
|
138
|
+
## The `schema` object
|
139
|
+
|
140
|
+
Each core exposes a `schema` object that allows you to find out about
|
141
|
+
the fields, copyfields, and field types, and (on localhost) muck
|
142
|
+
with the system on the fly.
|
143
|
+
|
144
|
+
The schema object is initially created by using the admin api to
|
145
|
+
get lists of fields and field types, and the XML for the field types
|
146
|
+
is derived by parsing out the schema.xml returned by the api call. Solr
|
147
|
+
does *not* expand entities in the returned XML, so if you have `system`
|
148
|
+
entities (e.g., you're including stuff off of disk), SimpleSolrClient won't
|
149
|
+
get that text and things will likely blow up.
|
150
|
+
|
151
|
+
|
152
|
+
```ruby
|
153
|
+
|
154
|
+
# Get a list of cores
|
155
|
+
client.cores #=> ['core1']
|
156
|
+
core = client.core('core1')
|
157
|
+
|
158
|
+
# Get an object representing the schema.xml file
|
159
|
+
schema = core.schema #=> SimpleSolrClient::Schema object
|
160
|
+
|
161
|
+
# Get lists of field, dynamicFields, copyFields, and fieldTypes
|
162
|
+
# all as SimpleSolrClient::Schema::XXX objects
|
163
|
+
|
164
|
+
explicit_fields = schema.fields
|
165
|
+
dynamic_fields = schema.dynamic_fields
|
166
|
+
copy_fields = schema.copy_fields
|
167
|
+
field_types = schema.field_types
|
168
|
+
|
169
|
+
```
|
170
|
+
|
171
|
+
### Regular fields
|
172
|
+
|
173
|
+
Internally I call these "explicit_fields" as opposed to dynamic fields.
|
174
|
+
|
175
|
+
```
|
176
|
+
f = schema.field('id')
|
177
|
+
f.name #=> 'id'
|
178
|
+
f.type.name #=> 'string'
|
179
|
+
f.type.solr_class #=> 'solr.StrField'
|
180
|
+
|
181
|
+
# Basic attributes
|
182
|
+
# These will fall back on the fieldType if not defined for a
|
183
|
+
# particular field.
|
184
|
+
|
185
|
+
f.stored #=> true
|
186
|
+
f.indexed #=> true
|
187
|
+
f.multi #=> nil # defined on neither field 'id' or fieldType 'string'
|
188
|
+
|
189
|
+
# We implement a matcher, which is just string equality
|
190
|
+
f.matches('id') #=> true
|
191
|
+
f.matches('id_t') #=>false
|
192
|
+
|
193
|
+
# You can add fields, and save it back if you're on
|
194
|
+
# localhost
|
195
|
+
|
196
|
+
schema.add_field Field.new(:name=>'format', :type_name=>'string', :multi=>true, :stored=>false, :indexed=>true)
|
197
|
+
|
198
|
+
schema.write; core.reload # only on localhost
|
199
|
+
|
200
|
+
core.schema.field('format').type.name #=> 'string'
|
201
|
+
|
202
|
+
```
|
203
|
+
|
204
|
+
### Dynamic fields
|
205
|
+
|
206
|
+
The rule Solr uses for dynamic fields is "longest one wins"
|
207
|
+
Right now, I'm only handling _leading_ asterisks, so `*_t` will
|
208
|
+
work, but `text_*` will not.
|
209
|
+
|
210
|
+
```
|
211
|
+
schema.dynamic_fields.size #=> 23
|
212
|
+
f = schema.dynamic_field('*_t') #=> SimpleSolrClient::Schema::DynamicField
|
213
|
+
f.name #=> '*_t')
|
214
|
+
f.type.name #=> 'text_general'
|
215
|
+
f.stored #=> true
|
216
|
+
f.matches('name_t') #=> true
|
217
|
+
f.matches('name_t_i') #=> false
|
218
|
+
f.matches('name') #=> false
|
219
|
+
|
220
|
+
# Dynamic Fields can also be added
|
221
|
+
schema.add_dynamic_field(:name=>"*_f", :type_name=>'float')
|
222
|
+
|
223
|
+
```
|
224
|
+
|
225
|
+
### Copy Fields
|
226
|
+
|
227
|
+
CopyFields are a different beast: they only have a source and a dest, and
|
228
|
+
they can have multiple targets. For that reason, the interface is slightly
|
229
|
+
different (`#copy_fields_for` instead of just `#copy_field`)
|
230
|
+
|
231
|
+
```
|
232
|
+
|
233
|
+
# <copyField source="*_t_s", dest="*_t"/>
|
234
|
+
# <copyField source="*_t_s", dest="*_s"/>
|
235
|
+
|
236
|
+
cfs = schema.copy_fields_for('*_ts')
|
237
|
+
cfs.size #=> 2
|
238
|
+
cfs.map(&:dest) #=> ["*_t", "*_s"]
|
239
|
+
|
240
|
+
cf = SimpleSolrClient::Schema::CopyField.new('title', 'allfields')
|
241
|
+
cf.source #=> 'title'
|
242
|
+
cf.dest #=> 'allfields'
|
243
|
+
|
244
|
+
schema.add_copy_field(cf)
|
245
|
+
```
|
246
|
+
|
247
|
+
|
248
|
+
### Field Types
|
249
|
+
|
250
|
+
Field Types are created by getting data from the API and also
|
251
|
+
parsing XML out of the schema.xml (for later creating a new
|
252
|
+
schema.xml if you'd like).
|
253
|
+
|
254
|
+
You can also ask a field type how it would tokenize an input
|
255
|
+
string via indexing or querying.
|
256
|
+
|
257
|
+
|
258
|
+
FieldTypes _should_ be able to, say, report their XML serialization even
|
259
|
+
when outside of a particular schema object, but right now that doesn't
|
260
|
+
work. If you make changes to a field type, the only way to see the new
|
261
|
+
serialization is to call `schema.to_xml` on whichever schema you added
|
262
|
+
it to via `schema.add_field_type(ft)`
|
263
|
+
|
264
|
+
|
265
|
+
|
266
|
+
```ruby
|
267
|
+
|
268
|
+
schema.field_types.size #=> 23
|
269
|
+
ft = schema.field_type('text') #=> SimpleSolrClient::Schema::FieldType
|
270
|
+
ft.name #=> 'text'
|
271
|
+
ft.solr_class #=> 'solr.TextField'
|
272
|
+
ft.multi #=> true
|
273
|
+
ft.stored #=> true
|
274
|
+
ft.indexed #=> true
|
275
|
+
# etc.
|
276
|
+
|
277
|
+
newft = SimpleSolrClient::Schema::FieldType.new_from_xml(xmlstring)
|
278
|
+
schema.add_field_type(newft)
|
279
|
+
|
280
|
+
ft.name #=> text
|
281
|
+
ft.query_tokens "Don't forget me when I'm getting H20"
|
282
|
+
#=> ["don't", "forget", "me", "when", "i'm", ["getting", "get"], "h20"]
|
283
|
+
|
284
|
+
ft.index_tokens 'When it rains, it pours'
|
285
|
+
#=> ["when", "it", ["rains", "rain"], "it", ["pours", "pour"]]
|
286
|
+
|
287
|
+
```
|
288
|
+
|
289
|
+
|
290
|
+
## What will I get if I index a field named `str`?
|
291
|
+
|
292
|
+
Dynamic- and copy-fields are very convenient, but it can make it hard to
|
293
|
+
figure out what you're actually going to get in your indexed and
|
294
|
+
stored fields. I started thinking about this [at the end of this blog post](http://robotlibrarian.billdueber.com/2014/10/schemaless-solr-with-dynamicfield-and-copyfield/)
|
295
|
+
|
296
|
+
`schema.resulting_fields(str)` will take the field name given and
|
297
|
+
figure out what fields would be generated, returning an array of field
|
298
|
+
objects (which are created wholesale if need be due to dynamicFields or
|
299
|
+
copyFields).
|
300
|
+
|
301
|
+
```ruby
|
302
|
+
rs = schema.resulting_fields('name_t_s')
|
303
|
+
rs.size #=> 3
|
304
|
+
|
305
|
+
rs.map{|f| [f.name, f.type.name]}
|
306
|
+
#=> [["name_t_s", "ignored"], ["name_t", "text"], ["name", "string"]]
|
307
|
+
|
308
|
+
rs.find_all{|f| f.stored}.map(&:name) #=> ["name"]
|
309
|
+
rs.find_all{|f| f.indexed}.map(&:name) #=> ['name_t']
|
310
|
+
|
311
|
+
|
312
|
+
|
313
|
+
```
|
314
|
+
|
315
|
+
|
316
|
+
## Saving/reloading a changed schema
|
317
|
+
|
318
|
+
Whether you change a solr install via editing a text file or
|
319
|
+
by using `schema.write`, you can always reload a core.
|
320
|
+
|
321
|
+
```ruby
|
322
|
+
core.reload
|
323
|
+
```
|
324
|
+
|
325
|
+
If you're working on localhost, you can make programmatic changes
|
326
|
+
to the schema and then ask for a write/reload cycle. It uses the API
|
327
|
+
to find the path to the schema.xml file and overwrites it.
|
328
|
+
|
329
|
+
```ruby
|
330
|
+
|
331
|
+
schema = core.schema
|
332
|
+
core.add_field Field.new(:name=>'price', :type_name=>'float')
|
333
|
+
schema.write
|
334
|
+
schema = core.reload.schema
|
335
|
+
```
|
336
|
+
|
337
|
+
|
338
|
+
## Installation
|
339
|
+
|
340
|
+
$ gem install simple_solr
|
341
|
+
|
342
|
+
|
343
|
+
## Contributing
|
344
|
+
|
345
|
+
1. Fork it ( https://github.com/billdueber/simple_solr/fork )
|
346
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
347
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
348
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
349
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/lib/simple_solr.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
module SimpleSolrClient
|
2
|
+
|
3
|
+
ESCAPE_CHARS = '+-&|!(){}[]^"~*?:\\'
|
4
|
+
ESCAPE_MAP = ESCAPE_CHARS.split(//).each_with_object({}) {|x,h| h[x] = "\\" + x}
|
5
|
+
ESCAPE_PAT = Regexp.new('[' + Regexp.quote(ESCAPE_CHARS) + ']')
|
6
|
+
|
7
|
+
# Escape those characters that need escaping to be valid lucene syntax.
|
8
|
+
# Is *not* called internally, since how as I supposed to know if the parens/quotes are a
|
9
|
+
# part of your string or there for legal lucene grouping?
|
10
|
+
#
|
11
|
+
def self.lucene_escape(str)
|
12
|
+
esc = str.to_s.gsub(ESCAPE_PAT, ESCAPE_MAP)
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
# Where is the sample core configuration?
|
17
|
+
SAMPLE_CORE_DIR = File.absolute_path File.join(File.dirname(__FILE__), '..', 'solr_sample_core')
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'httpclient'
|
22
|
+
require 'forwardable'
|
23
|
+
|
24
|
+
# Choose a JSON-compatible json parser/producer
|
25
|
+
if defined? JRUBY_VERSION
|
26
|
+
require 'json'
|
27
|
+
else
|
28
|
+
require 'oj'
|
29
|
+
Oj.mimic_JSON
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
require "simple_solr/version"
|
36
|
+
|
37
|
+
# Need to load core before client because of inter-dependencies resulting
|
38
|
+
# in 'require' recursion
|
39
|
+
|
40
|
+
require 'simple_solr/core'
|
41
|
+
require 'simple_solr/client'
|
42
|
+
|
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'httpclient'
|
2
|
+
require 'simple_solr/response/generic_response'
|
3
|
+
require 'securerandom'
|
4
|
+
|
5
|
+
require 'simple_solr/core'
|
6
|
+
|
7
|
+
module SimpleSolrClient
|
8
|
+
|
9
|
+
# A Client talks to the Solr instance; use a SimpleSolrClient::Core to talk to a
|
10
|
+
# particular core.
|
11
|
+
|
12
|
+
class Client
|
13
|
+
|
14
|
+
attr_reader :base_url, :rawclient
|
15
|
+
|
16
|
+
def initialize(url)
|
17
|
+
@base_url = url.chomp('/')
|
18
|
+
@client_url = @base_url
|
19
|
+
@rawclient = HTTPClient.new
|
20
|
+
end
|
21
|
+
|
22
|
+
# Construct a URL for the given arguments that hit the configured solr
|
23
|
+
# @return [String] the new url, based on the base_url and the passed args
|
24
|
+
def url(*args)
|
25
|
+
[@base_url, *args].join('/').chomp('/')
|
26
|
+
end
|
27
|
+
|
28
|
+
# Sometimes, you just gotta have a top_level_url (as opposed to a
|
29
|
+
# core-level URL)
|
30
|
+
def top_level_url(*args)
|
31
|
+
[@client_url, *args].join('/').chomp('/')
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
# Call a get on the underlying http client and return the content
|
36
|
+
# You can pass in :force_top_level=>true for those cases wehn
|
37
|
+
# you absolutely have to use the client-level url and not a
|
38
|
+
# core level URL
|
39
|
+
def raw_get_content(path, args={})
|
40
|
+
if args.delete(:force_top_level_url)
|
41
|
+
u = top_level_url(path)
|
42
|
+
else
|
43
|
+
u = url(path)
|
44
|
+
end
|
45
|
+
res = @rawclient.get(u, args)
|
46
|
+
res.content
|
47
|
+
end
|
48
|
+
|
49
|
+
# A basic get to the instance (not any specific core)
|
50
|
+
# @param [String] path The parts of the URL that comes after the core
|
51
|
+
# @param [Hash] args The url arguments
|
52
|
+
# @return [Hash] the parsed-out response
|
53
|
+
def _get(path, args={})
|
54
|
+
path.sub! /\A\//, ''
|
55
|
+
args['wt'] = 'json'
|
56
|
+
res = JSON.parse(raw_get_content(path, args))
|
57
|
+
if res['error']
|
58
|
+
raise RuntimeError.new, res['error']
|
59
|
+
end
|
60
|
+
res
|
61
|
+
end
|
62
|
+
|
63
|
+
# post JSON data.
|
64
|
+
# @param [String] path The parts of the URL that comes after the core
|
65
|
+
# @param [Hash,Array] object_to_post The data to post as json
|
66
|
+
# @return [Hash] the parsed-out response
|
67
|
+
|
68
|
+
def _post_json(path, object_to_post)
|
69
|
+
resp = @rawclient.post(url(path), JSON.dump(object_to_post), {'Content-type' => 'application/json'})
|
70
|
+
JSON.parse(resp.content)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Get from solr, and return a Response object of some sort
|
74
|
+
# @return [SimpleSolrClient::Response, response_type]
|
75
|
+
def get(path, args = {}, response_type = nil)
|
76
|
+
response_type = SimpleSolrClient::Response::GenericResponse if response_type.nil?
|
77
|
+
response_type.new(_get(path, args))
|
78
|
+
end
|
79
|
+
|
80
|
+
# Post an object as JSON and return a Response object
|
81
|
+
# @return [SimpleSolrClient::Response, response_type]
|
82
|
+
def post_json(path, object_to_post, response_type = nil)
|
83
|
+
response_type = SimpleSolrClient::Response::GenericResponse if response_type.nil?
|
84
|
+
response_type.new(_post_json(path, object_to_post))
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# Get a client specific to the given core2
|
89
|
+
# @param [String] corename The name of the core (which must already exist!)
|
90
|
+
# @return [SimpleSolrClient::Core]
|
91
|
+
def core(corename)
|
92
|
+
SimpleSolrClient::Core.new(@base_url, corename)
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
def cores
|
97
|
+
cdata = get('admin/cores', {:force_top_level_url=>true}).status.keys
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
# Create a new, temporary core
|
102
|
+
#noinspection RubyWrongHash
|
103
|
+
def new_core(corename)
|
104
|
+
dir = temp_core_dir_setup(corename)
|
105
|
+
|
106
|
+
args = {
|
107
|
+
:wt => 'json',
|
108
|
+
:action => 'CREATE',
|
109
|
+
:name => corename,
|
110
|
+
:instanceDir => dir
|
111
|
+
}
|
112
|
+
|
113
|
+
get('admin/cores', args)
|
114
|
+
core(corename)
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
def temp_core
|
119
|
+
new_core('sstemp_' + SecureRandom.uuid)
|
120
|
+
end
|
121
|
+
|
122
|
+
# Set up files for a temp core
|
123
|
+
def temp_core_dir_setup(corename)
|
124
|
+
dest = Dir.mktmpdir("simple_solr_#{corename}")
|
125
|
+
src = SAMPLE_CORE_DIR
|
126
|
+
FileUtils.cp_r File.join(src, '.'), dest
|
127
|
+
dest
|
128
|
+
end
|
129
|
+
|
130
|
+
# Unload all cores whose name includes 'sstemp'
|
131
|
+
def unload_temp_cores
|
132
|
+
cores.each do |k|
|
133
|
+
core(k).unload if k =~ /sstemp/
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|