simple_solr_client 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +349 -0
- data/Rakefile +11 -0
- data/lib/simple_solr.rb +42 -0
- data/lib/simple_solr/client.rb +139 -0
- data/lib/simple_solr/client/core_admin.rb +0 -0
- data/lib/simple_solr/core.rb +50 -0
- data/lib/simple_solr/core/admin.rb +47 -0
- data/lib/simple_solr/core/core_data.rb +51 -0
- data/lib/simple_solr/core/index.rb +25 -0
- data/lib/simple_solr/core/search.rb +21 -0
- data/lib/simple_solr/response/document.rb +45 -0
- data/lib/simple_solr/response/generic_response.rb +19 -0
- data/lib/simple_solr/response/query_response.rb +54 -0
- data/lib/simple_solr/schema.rb +261 -0
- data/lib/simple_solr/schema/analysis.rb +58 -0
- data/lib/simple_solr/schema/copyfield.rb +42 -0
- data/lib/simple_solr/schema/dynamic_field.rb +23 -0
- data/lib/simple_solr/schema/field.rb +35 -0
- data/lib/simple_solr/schema/field_or_type.rb +112 -0
- data/lib/simple_solr/schema/field_type.rb +62 -0
- data/lib/simple_solr/schema/matcher.rb +16 -0
- data/lib/simple_solr/version.rb +3 -0
- data/simple_solr_client.gemspec +39 -0
- data/solr_sample_core/conf/_schema_analysis_stopwords_english.json +38 -0
- data/solr_sample_core/conf/_schema_analysis_synonyms_english.json +11 -0
- data/solr_sample_core/conf/admin-extra.html +24 -0
- data/solr_sample_core/conf/admin-extra.menu-bottom.html +25 -0
- data/solr_sample_core/conf/admin-extra.menu-top.html +25 -0
- data/solr_sample_core/conf/clustering/carrot2/kmeans-attributes.xml +19 -0
- data/solr_sample_core/conf/clustering/carrot2/lingo-attributes.xml +24 -0
- data/solr_sample_core/conf/clustering/carrot2/stc-attributes.xml +19 -0
- data/solr_sample_core/conf/currency.xml +67 -0
- data/solr_sample_core/conf/elevate.xml +38 -0
- data/solr_sample_core/conf/lang/contractions_ca.txt +8 -0
- data/solr_sample_core/conf/lang/contractions_fr.txt +15 -0
- data/solr_sample_core/conf/lang/contractions_ga.txt +5 -0
- data/solr_sample_core/conf/lang/contractions_it.txt +23 -0
- data/solr_sample_core/conf/lang/hyphenations_ga.txt +5 -0
- data/solr_sample_core/conf/lang/stemdict_nl.txt +6 -0
- data/solr_sample_core/conf/lang/stoptags_ja.txt +420 -0
- data/solr_sample_core/conf/lang/stopwords_ar.txt +125 -0
- data/solr_sample_core/conf/lang/stopwords_bg.txt +193 -0
- data/solr_sample_core/conf/lang/stopwords_ca.txt +220 -0
- data/solr_sample_core/conf/lang/stopwords_ckb.txt +136 -0
- data/solr_sample_core/conf/lang/stopwords_cz.txt +172 -0
- data/solr_sample_core/conf/lang/stopwords_da.txt +110 -0
- data/solr_sample_core/conf/lang/stopwords_de.txt +294 -0
- data/solr_sample_core/conf/lang/stopwords_el.txt +78 -0
- data/solr_sample_core/conf/lang/stopwords_en.txt +54 -0
- data/solr_sample_core/conf/lang/stopwords_es.txt +356 -0
- data/solr_sample_core/conf/lang/stopwords_eu.txt +99 -0
- data/solr_sample_core/conf/lang/stopwords_fa.txt +313 -0
- data/solr_sample_core/conf/lang/stopwords_fi.txt +97 -0
- data/solr_sample_core/conf/lang/stopwords_fr.txt +186 -0
- data/solr_sample_core/conf/lang/stopwords_ga.txt +110 -0
- data/solr_sample_core/conf/lang/stopwords_gl.txt +161 -0
- data/solr_sample_core/conf/lang/stopwords_hi.txt +235 -0
- data/solr_sample_core/conf/lang/stopwords_hu.txt +211 -0
- data/solr_sample_core/conf/lang/stopwords_hy.txt +46 -0
- data/solr_sample_core/conf/lang/stopwords_id.txt +359 -0
- data/solr_sample_core/conf/lang/stopwords_it.txt +303 -0
- data/solr_sample_core/conf/lang/stopwords_ja.txt +127 -0
- data/solr_sample_core/conf/lang/stopwords_lv.txt +172 -0
- data/solr_sample_core/conf/lang/stopwords_nl.txt +119 -0
- data/solr_sample_core/conf/lang/stopwords_no.txt +194 -0
- data/solr_sample_core/conf/lang/stopwords_pt.txt +253 -0
- data/solr_sample_core/conf/lang/stopwords_ro.txt +233 -0
- data/solr_sample_core/conf/lang/stopwords_ru.txt +243 -0
- data/solr_sample_core/conf/lang/stopwords_sv.txt +133 -0
- data/solr_sample_core/conf/lang/stopwords_th.txt +119 -0
- data/solr_sample_core/conf/lang/stopwords_tr.txt +212 -0
- data/solr_sample_core/conf/lang/userdict_ja.txt +29 -0
- data/solr_sample_core/conf/mapping-FoldToASCII.txt +3813 -0
- data/solr_sample_core/conf/mapping-ISOLatin1Accent.txt +246 -0
- data/solr_sample_core/conf/protwords.txt +21 -0
- data/solr_sample_core/conf/schema.xml +62 -0
- data/solr_sample_core/conf/scripts.conf +24 -0
- data/solr_sample_core/conf/solrconfig.xml +1702 -0
- data/solr_sample_core/conf/spellings.txt +2 -0
- data/solr_sample_core/conf/stopwords.txt +14 -0
- data/solr_sample_core/conf/syn.txt +0 -0
- data/solr_sample_core/conf/synonyms.txt +29 -0
- data/solr_sample_core/conf/token_fixing_charfilter.txt +110 -0
- data/solr_sample_core/conf/update-script.js +53 -0
- data/solr_sample_core/conf/velocity/README.txt +101 -0
- data/solr_sample_core/conf/velocity/VM_global_library.vm +175 -0
- data/solr_sample_core/conf/velocity/browse.vm +33 -0
- data/solr_sample_core/conf/velocity/cluster.vm +19 -0
- data/solr_sample_core/conf/velocity/cluster_results.vm +31 -0
- data/solr_sample_core/conf/velocity/debug.vm +28 -0
- data/solr_sample_core/conf/velocity/did_you_mean.vm +9 -0
- data/solr_sample_core/conf/velocity/error.vm +11 -0
- data/solr_sample_core/conf/velocity/facet_fields.vm +23 -0
- data/solr_sample_core/conf/velocity/facet_pivot.vm +12 -0
- data/solr_sample_core/conf/velocity/facet_queries.vm +12 -0
- data/solr_sample_core/conf/velocity/facet_ranges.vm +23 -0
- data/solr_sample_core/conf/velocity/facets.vm +10 -0
- data/solr_sample_core/conf/velocity/footer.vm +43 -0
- data/solr_sample_core/conf/velocity/head.vm +35 -0
- data/solr_sample_core/conf/velocity/header.vm +7 -0
- data/solr_sample_core/conf/velocity/hit.vm +25 -0
- data/solr_sample_core/conf/velocity/hit_grouped.vm +43 -0
- data/solr_sample_core/conf/velocity/hit_plain.vm +25 -0
- data/solr_sample_core/conf/velocity/join_doc.vm +20 -0
- data/solr_sample_core/conf/velocity/jquery.autocomplete.css +48 -0
- data/solr_sample_core/conf/velocity/jquery.autocomplete.js +763 -0
- data/solr_sample_core/conf/velocity/layout.vm +24 -0
- data/solr_sample_core/conf/velocity/main.css +230 -0
- data/solr_sample_core/conf/velocity/mime_type_lists.vm +68 -0
- data/solr_sample_core/conf/velocity/pagination_bottom.vm +22 -0
- data/solr_sample_core/conf/velocity/pagination_top.vm +29 -0
- data/solr_sample_core/conf/velocity/product_doc.vm +32 -0
- data/solr_sample_core/conf/velocity/query.vm +42 -0
- data/solr_sample_core/conf/velocity/query_form.vm +64 -0
- data/solr_sample_core/conf/velocity/query_group.vm +43 -0
- data/solr_sample_core/conf/velocity/query_spatial.vm +75 -0
- data/solr_sample_core/conf/velocity/results_list.vm +22 -0
- data/solr_sample_core/conf/velocity/richtext_doc.vm +153 -0
- data/solr_sample_core/conf/velocity/suggest.vm +8 -0
- data/solr_sample_core/conf/velocity/tabs.vm +50 -0
- data/solr_sample_core/conf/xslt/example.xsl +132 -0
- data/solr_sample_core/conf/xslt/example_atom.xsl +67 -0
- data/solr_sample_core/conf/xslt/example_rss.xsl +66 -0
- data/solr_sample_core/conf/xslt/luke.xsl +337 -0
- data/solr_sample_core/conf/xslt/updateXml.xsl +70 -0
- data/spec/client_basics_spec.rb +26 -0
- data/spec/connect_spec.rb +25 -0
- data/spec/core_basics.rb +21 -0
- data/spec/index_spec.rb +31 -0
- data/spec/load_spec.rb +7 -0
- data/spec/minitest_helper.rb +36 -0
- data/spec/schema_spec.rb +113 -0
- metadata +284 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 6f960c75573490bad51ced465320ab2e824a15df
|
|
4
|
+
data.tar.gz: 0e70ff3edc2146651a56f578afd9a420120f85c3
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: f76f69c12364d15b6461c173309e3e614e073a7f1d7f04fa0a54f04a483a4a6647c4a0fb49eb9b9cc767386d209459aac39cf122db9c2262ed7c7eb6e541bff8
|
|
7
|
+
data.tar.gz: bec08b9cc97e2d9ac00fa9b34c10e78cc48ca0b9dcd788a5d967eab097bdba94f0eec34966b58f0e1c169930b83f5ba217ce61735711f1aa868381490a99eaa0
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Copyright (c) 2014 Bill Dueber
|
|
2
|
+
|
|
3
|
+
MIT License
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
6
|
+
a copy of this software and associated documentation files (the
|
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
11
|
+
the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be
|
|
14
|
+
included in all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
# SimpleSolrClient
|
|
2
|
+
|
|
3
|
+
[Note: still woefully incomplete, but in the spirit of "release early,
|
|
4
|
+
even if it's bad", here it is.]
|
|
5
|
+
|
|
6
|
+
A Solr client specifically designed to try to help you test what the heck
|
|
7
|
+
solr is actually doing.
|
|
8
|
+
|
|
9
|
+
Most useful when running on the same machine as the solr install, but
|
|
10
|
+
still useful even when you're not.
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
## Motivation
|
|
14
|
+
|
|
15
|
+
Solr is complex.
|
|
16
|
+
|
|
17
|
+
It's complex enough, and fuddles with enough edge cases, that reading
|
|
18
|
+
the documentation and/or the code doesn't get me the understanding
|
|
19
|
+
that I feel I need.
|
|
20
|
+
|
|
21
|
+
If I were smarter, maybe I wouldn't need something like this.
|
|
22
|
+
|
|
23
|
+
I wanted a way to test what solr is actually doing, and
|
|
24
|
+
this library is a way for me to start to do that in a fashion that's
|
|
25
|
+
more convenient that doing everything "by hand" in the admin dashboard
|
|
26
|
+
or running queries via URLs in my browser or using curl.
|
|
27
|
+
|
|
28
|
+
I wanted a way to figure out what fields (of what types) are being created,
|
|
29
|
+
how things were being tokenized, etc., but all within the comfort of a test
|
|
30
|
+
suite that I could run against solr configurations to make sure things
|
|
31
|
+
weren't breaking when I made changes. I wanted to build up a structure around relevance
|
|
32
|
+
ranking tests (still coming, sadly) and quickly swap out different
|
|
33
|
+
configs to make sure it all works as I expect.
|
|
34
|
+
|
|
35
|
+
So: a simple solr library, with more exposure than most of what's out there
|
|
36
|
+
to the solr administration API and the introspection/analysis it affords.
|
|
37
|
+
|
|
38
|
+
# Features:
|
|
39
|
+
|
|
40
|
+
* Basic add/delete/query
|
|
41
|
+
* Commit/optimize/clear an index
|
|
42
|
+
* Reload a core after editing/adjusting a config file
|
|
43
|
+
* Inspect lists of fields, dynamicFields, copyFields, and
|
|
44
|
+
fieldTypes
|
|
45
|
+
* Determine which fields (and their properties) would be
|
|
46
|
+
created when a given field name is indexed, taking into
|
|
47
|
+
account dynamicField and copyField directives.
|
|
48
|
+
* Get list of the tokens that would be created if you
|
|
49
|
+
send a string to a paricular fieldType (like in the
|
|
50
|
+
solr admin analysis page)
|
|
51
|
+
* Spit a modified schema object back out as xml for
|
|
52
|
+
saving somewhere if you'd like
|
|
53
|
+
|
|
54
|
+
Additional features when running against a localhost solr:
|
|
55
|
+
* Spin up a temporary core to play with
|
|
56
|
+
* Add/remove fields, dynamic_fields, copy_fields, and field types
|
|
57
|
+
on the fly and save them back, ready for a reload
|
|
58
|
+
* Create temporary cores for doing testing
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
## Basic add and delete of documents, and simple queries
|
|
63
|
+
|
|
64
|
+
Right now, it supports only the most basic add/delete/query operations.
|
|
65
|
+
Adding in support for more complex queries is on the TODO list, but took
|
|
66
|
+
a back seat to dealing with the schema.
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
```ruby
|
|
70
|
+
|
|
71
|
+
# A "client" points to a running solr, independent of the particular core
|
|
72
|
+
# You get a core from it.
|
|
73
|
+
|
|
74
|
+
client = SimpleSolrClient::Client.new('http://localhost:8983/solr')
|
|
75
|
+
core = client.core('core1') # must already exist!
|
|
76
|
+
core.url #=> "http://localhost:8983/solr/core1"
|
|
77
|
+
|
|
78
|
+
core.name #=> 'core1'
|
|
79
|
+
core.number_of_documents #=> 7, what was in there already
|
|
80
|
+
core.instance_dir #=> "/Users/dueberb/devel/java/solr/example/solr/collection1/"
|
|
81
|
+
core.schema_file #=> <path>/<to>/<schema.xml>
|
|
82
|
+
|
|
83
|
+
# Remove all the indexed documents and (automatically) commit
|
|
84
|
+
core.clear
|
|
85
|
+
|
|
86
|
+
# Add documents
|
|
87
|
+
#
|
|
88
|
+
# name_t is a text_general, multiValued, indexed, stored field
|
|
89
|
+
h1 = {:id => 'b', :name_t=>"Bill Dueber"}
|
|
90
|
+
h2 = {:id => 'd', :name_t=>"Danit Brown"}
|
|
91
|
+
h3 = {:id => 'z', :name_t=>"Ziv Brown Dueber"}
|
|
92
|
+
|
|
93
|
+
core.add_docs(h1)
|
|
94
|
+
|
|
95
|
+
core.number_of_documents #=> 0? But why? Oh, right...
|
|
96
|
+
core.commit
|
|
97
|
+
core.number_of_documents #=> 1 There we go
|
|
98
|
+
|
|
99
|
+
# You can chain many core operations
|
|
100
|
+
core.clear.add_docs([h1,h2, h3]).commit.optimize.number_of_documents #=> 3
|
|
101
|
+
|
|
102
|
+
# only the most basic querying is currently supported
|
|
103
|
+
# Result of a query is a QueryResponse, which contains a list of Document
|
|
104
|
+
# objects, which respond to ['fieldname']
|
|
105
|
+
|
|
106
|
+
# All bring back all documents up to the page limit
|
|
107
|
+
core.all.size #=> 3
|
|
108
|
+
core.all.map{|d| d['name_t']} #=> [['Bill Dueber'], ['Danit Brown'], ['Ziv Brown Dueber']]
|
|
109
|
+
|
|
110
|
+
# Simple field/value search
|
|
111
|
+
docs = core.fv_search(:name_t, 'Brown')
|
|
112
|
+
docs.class #=> SimpleSolrClient::Response::QueryResponse
|
|
113
|
+
|
|
114
|
+
docs.size #=> 2
|
|
115
|
+
docs..map{|d| d['name_t']} #=> [['Danit Brown'], ['Ziv Brown Dueber']]
|
|
116
|
+
|
|
117
|
+
# Special-case id/score as regular methods
|
|
118
|
+
docs.first.id #=> 'd'
|
|
119
|
+
docs.first.score #=> 0.625
|
|
120
|
+
|
|
121
|
+
# Figure out where documents fall. "Ziv Brown Dueber" contains both
|
|
122
|
+
# search terms, so should come first
|
|
123
|
+
docs = core.fv_search(:name_t, 'Brown Dueber')
|
|
124
|
+
docs.size #=> 3
|
|
125
|
+
|
|
126
|
+
docs.rank('z') #=> 1 (check by id)
|
|
127
|
+
docs.rank('z') < docs.rank('b') #=> true
|
|
128
|
+
|
|
129
|
+
# Of course, we can do it by score
|
|
130
|
+
docs.score('z') > docs.score('d')
|
|
131
|
+
|
|
132
|
+
# In addition to #clear, we can delete by simple query
|
|
133
|
+
core.delete('name_t:Dueber').commit.number_of_documents #=> 1
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## The `schema` object
|
|
139
|
+
|
|
140
|
+
Each core exposes a `schema` object that allows you to find out about
|
|
141
|
+
the fields, copyfields, and field types, and (on localhost) muck
|
|
142
|
+
with the system on the fly.
|
|
143
|
+
|
|
144
|
+
The schema object is initially created by using the admin api to
|
|
145
|
+
get lists of fields and field types, and the XML for the field types
|
|
146
|
+
is derived by parsing out the schema.xml returned by the api call. Solr
|
|
147
|
+
does *not* expand entities in the returned XML, so if you have `system`
|
|
148
|
+
entities (e.g., you're including stuff off of disk), SimpleSolrClient won't
|
|
149
|
+
get that text and things will likely blow up.
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
```ruby
|
|
153
|
+
|
|
154
|
+
# Get a list of cores
|
|
155
|
+
client.cores #=> ['core1']
|
|
156
|
+
core = client.core('core1')
|
|
157
|
+
|
|
158
|
+
# Get an object representing the schema.xml file
|
|
159
|
+
schema = core.schema #=> SimpleSolrClient::Schema object
|
|
160
|
+
|
|
161
|
+
# Get lists of field, dynamicFields, copyFields, and fieldTypes
|
|
162
|
+
# all as SimpleSolrClient::Schema::XXX objects
|
|
163
|
+
|
|
164
|
+
explicit_fields = schema.fields
|
|
165
|
+
dynamic_fields = schema.dynamic_fields
|
|
166
|
+
copy_fields = schema.copy_fields
|
|
167
|
+
field_types = schema.field_types
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Regular fields
|
|
172
|
+
|
|
173
|
+
Internally I call these "explicit_fields" as opposed to dynamic fields.
|
|
174
|
+
|
|
175
|
+
```
|
|
176
|
+
f = schema.field('id')
|
|
177
|
+
f.name #=> 'id'
|
|
178
|
+
f.type.name #=> 'string'
|
|
179
|
+
f.type.solr_class #=> 'solr.StrField'
|
|
180
|
+
|
|
181
|
+
# Basic attributes
|
|
182
|
+
# These will fall back on the fieldType if not defined for a
|
|
183
|
+
# particular field.
|
|
184
|
+
|
|
185
|
+
f.stored #=> true
|
|
186
|
+
f.indexed #=> true
|
|
187
|
+
f.multi #=> nil # defined on neither field 'id' or fieldType 'string'
|
|
188
|
+
|
|
189
|
+
# We implement a matcher, which is just string equality
|
|
190
|
+
f.matches('id') #=> true
|
|
191
|
+
f.matches('id_t') #=>false
|
|
192
|
+
|
|
193
|
+
# You can add fields, and save it back if you're on
|
|
194
|
+
# localhost
|
|
195
|
+
|
|
196
|
+
schema.add_field Field.new(:name=>'format', :type_name=>'string', :multi=>true, :stored=>false, :indexed=>true)
|
|
197
|
+
|
|
198
|
+
schema.write; core.reload # only on localhost
|
|
199
|
+
|
|
200
|
+
core.schema.field('format').type.name #=> 'string'
|
|
201
|
+
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Dynamic fields
|
|
205
|
+
|
|
206
|
+
The rule Solr uses for dynamic fields is "longest one wins"
|
|
207
|
+
Right now, I'm only handling _leading_ asterisks, so `*_t` will
|
|
208
|
+
work, but `text_*` will not.
|
|
209
|
+
|
|
210
|
+
```
|
|
211
|
+
schema.dynamic_fields.size #=> 23
|
|
212
|
+
f = schema.dynamic_field('*_t') #=> SimpleSolrClient::Schema::DynamicField
|
|
213
|
+
f.name #=> '*_t')
|
|
214
|
+
f.type.name #=> 'text_general'
|
|
215
|
+
f.stored #=> true
|
|
216
|
+
f.matches('name_t') #=> true
|
|
217
|
+
f.matches('name_t_i') #=> false
|
|
218
|
+
f.matches('name') #=> false
|
|
219
|
+
|
|
220
|
+
# Dynamic Fields can also be added
|
|
221
|
+
schema.add_dynamic_field(:name=>"*_f", :type_name=>'float')
|
|
222
|
+
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Copy Fields
|
|
226
|
+
|
|
227
|
+
CopyFields are a different beast: they only have a source and a dest, and
|
|
228
|
+
they can have multiple targets. For that reason, the interface is slightly
|
|
229
|
+
different (`#copy_fields_for` instead of just `#copy_field`)
|
|
230
|
+
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
# <copyField source="*_t_s", dest="*_t"/>
|
|
234
|
+
# <copyField source="*_t_s", dest="*_s"/>
|
|
235
|
+
|
|
236
|
+
cfs = schema.copy_fields_for('*_ts')
|
|
237
|
+
cfs.size #=> 2
|
|
238
|
+
cfs.map(&:dest) #=> ["*_t", "*_s"]
|
|
239
|
+
|
|
240
|
+
cf = SimpleSolrClient::Schema::CopyField.new('title', 'allfields')
|
|
241
|
+
cf.source #=> 'title'
|
|
242
|
+
cf.dest #=> 'allfields'
|
|
243
|
+
|
|
244
|
+
schema.add_copy_field(cf)
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
### Field Types
|
|
249
|
+
|
|
250
|
+
Field Types are created by getting data from the API and also
|
|
251
|
+
parsing XML out of the schema.xml (for later creating a new
|
|
252
|
+
schema.xml if you'd like).
|
|
253
|
+
|
|
254
|
+
You can also ask a field type how it would tokenize an input
|
|
255
|
+
string via indexing or querying.
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
FieldTypes _should_ be able to, say, report their XML serialization even
|
|
259
|
+
when outside of a particular schema object, but right now that doesn't
|
|
260
|
+
work. If you make changes to a field type, the only way to see the new
|
|
261
|
+
serialization is to call `schema.to_xml` on whichever schema you added
|
|
262
|
+
it to via `schema.add_field_type(ft)`
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
```ruby
|
|
267
|
+
|
|
268
|
+
schema.field_types.size #=> 23
|
|
269
|
+
ft = schema.field_type('text') #=> SimpleSolrClient::Schema::FieldType
|
|
270
|
+
ft.name #=> 'text'
|
|
271
|
+
ft.solr_class #=> 'solr.TextField'
|
|
272
|
+
ft.multi #=> true
|
|
273
|
+
ft.stored #=> true
|
|
274
|
+
ft.indexed #=> true
|
|
275
|
+
# etc.
|
|
276
|
+
|
|
277
|
+
newft = SimpleSolrClient::Schema::FieldType.new_from_xml(xmlstring)
|
|
278
|
+
schema.add_field_type(newft)
|
|
279
|
+
|
|
280
|
+
ft.name #=> text
|
|
281
|
+
ft.query_tokens "Don't forget me when I'm getting H20"
|
|
282
|
+
#=> ["don't", "forget", "me", "when", "i'm", ["getting", "get"], "h20"]
|
|
283
|
+
|
|
284
|
+
ft.index_tokens 'When it rains, it pours'
|
|
285
|
+
#=> ["when", "it", ["rains", "rain"], "it", ["pours", "pour"]]
|
|
286
|
+
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
## What will I get if I index a field named `str`?
|
|
291
|
+
|
|
292
|
+
Dynamic- and copy-fields are very convenient, but it can make it hard to
|
|
293
|
+
figure out what you're actually going to get in your indexed and
|
|
294
|
+
stored fields. I started thinking about this [at the end of this blog post](http://robotlibrarian.billdueber.com/2014/10/schemaless-solr-with-dynamicfield-and-copyfield/)
|
|
295
|
+
|
|
296
|
+
`schema.resulting_fields(str)` will take the field name given and
|
|
297
|
+
figure out what fields would be generated, returning an array of field
|
|
298
|
+
objects (which are created wholesale if need be due to dynamicFields or
|
|
299
|
+
copyFields).
|
|
300
|
+
|
|
301
|
+
```ruby
|
|
302
|
+
rs = schema.resulting_fields('name_t_s')
|
|
303
|
+
rs.size #=> 3
|
|
304
|
+
|
|
305
|
+
rs.map{|f| [f.name, f.type.name]}
|
|
306
|
+
#=> [["name_t_s", "ignored"], ["name_t", "text"], ["name", "string"]]
|
|
307
|
+
|
|
308
|
+
rs.find_all{|f| f.stored}.map(&:name) #=> ["name"]
|
|
309
|
+
rs.find_all{|f| f.indexed}.map(&:name) #=> ['name_t']
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
## Saving/reloading a changed schema
|
|
317
|
+
|
|
318
|
+
Whether you change a solr install via editing a text file or
|
|
319
|
+
by using `schema.write`, you can always reload a core.
|
|
320
|
+
|
|
321
|
+
```ruby
|
|
322
|
+
core.reload
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
If you're working on localhost, you can make programmatic changes
|
|
326
|
+
to the schema and then ask for a write/reload cycle. It uses the API
|
|
327
|
+
to find the path to the schema.xml file and overwrites it.
|
|
328
|
+
|
|
329
|
+
```ruby
|
|
330
|
+
|
|
331
|
+
schema = core.schema
|
|
332
|
+
core.add_field Field.new(:name=>'price', :type_name=>'float')
|
|
333
|
+
schema.write
|
|
334
|
+
schema = core.reload.schema
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
## Installation
|
|
339
|
+
|
|
340
|
+
$ gem install simple_solr
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
## Contributing
|
|
344
|
+
|
|
345
|
+
1. Fork it ( https://github.com/billdueber/simple_solr/fork )
|
|
346
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
|
347
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
|
348
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
|
349
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/lib/simple_solr.rb
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
module SimpleSolrClient
|
|
2
|
+
|
|
3
|
+
ESCAPE_CHARS = '+-&|!(){}[]^"~*?:\\'
|
|
4
|
+
ESCAPE_MAP = ESCAPE_CHARS.split(//).each_with_object({}) {|x,h| h[x] = "\\" + x}
|
|
5
|
+
ESCAPE_PAT = Regexp.new('[' + Regexp.quote(ESCAPE_CHARS) + ']')
|
|
6
|
+
|
|
7
|
+
# Escape those characters that need escaping to be valid lucene syntax.
|
|
8
|
+
# Is *not* called internally, since how as I supposed to know if the parens/quotes are a
|
|
9
|
+
# part of your string or there for legal lucene grouping?
|
|
10
|
+
#
|
|
11
|
+
def self.lucene_escape(str)
|
|
12
|
+
esc = str.to_s.gsub(ESCAPE_PAT, ESCAPE_MAP)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Where is the sample core configuration?
|
|
17
|
+
SAMPLE_CORE_DIR = File.absolute_path File.join(File.dirname(__FILE__), '..', 'solr_sample_core')
|
|
18
|
+
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
require 'httpclient'
|
|
22
|
+
require 'forwardable'
|
|
23
|
+
|
|
24
|
+
# Choose a JSON-compatible json parser/producer
|
|
25
|
+
if defined? JRUBY_VERSION
|
|
26
|
+
require 'json'
|
|
27
|
+
else
|
|
28
|
+
require 'oj'
|
|
29
|
+
Oj.mimic_JSON
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
require "simple_solr/version"
|
|
36
|
+
|
|
37
|
+
# Need to load core before client because of inter-dependencies resulting
|
|
38
|
+
# in 'require' recursion
|
|
39
|
+
|
|
40
|
+
require 'simple_solr/core'
|
|
41
|
+
require 'simple_solr/client'
|
|
42
|
+
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
require 'httpclient'
|
|
2
|
+
require 'simple_solr/response/generic_response'
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
|
|
5
|
+
require 'simple_solr/core'
|
|
6
|
+
|
|
7
|
+
module SimpleSolrClient
|
|
8
|
+
|
|
9
|
+
# A Client talks to the Solr instance; use a SimpleSolrClient::Core to talk to a
|
|
10
|
+
# particular core.
|
|
11
|
+
|
|
12
|
+
class Client
|
|
13
|
+
|
|
14
|
+
attr_reader :base_url, :rawclient
|
|
15
|
+
|
|
16
|
+
def initialize(url)
|
|
17
|
+
@base_url = url.chomp('/')
|
|
18
|
+
@client_url = @base_url
|
|
19
|
+
@rawclient = HTTPClient.new
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Construct a URL for the given arguments that hit the configured solr
|
|
23
|
+
# @return [String] the new url, based on the base_url and the passed args
|
|
24
|
+
def url(*args)
|
|
25
|
+
[@base_url, *args].join('/').chomp('/')
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Sometimes, you just gotta have a top_level_url (as opposed to a
|
|
29
|
+
# core-level URL)
|
|
30
|
+
def top_level_url(*args)
|
|
31
|
+
[@client_url, *args].join('/').chomp('/')
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# Call a get on the underlying http client and return the content
|
|
36
|
+
# You can pass in :force_top_level=>true for those cases wehn
|
|
37
|
+
# you absolutely have to use the client-level url and not a
|
|
38
|
+
# core level URL
|
|
39
|
+
def raw_get_content(path, args={})
|
|
40
|
+
if args.delete(:force_top_level_url)
|
|
41
|
+
u = top_level_url(path)
|
|
42
|
+
else
|
|
43
|
+
u = url(path)
|
|
44
|
+
end
|
|
45
|
+
res = @rawclient.get(u, args)
|
|
46
|
+
res.content
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# A basic get to the instance (not any specific core)
|
|
50
|
+
# @param [String] path The parts of the URL that comes after the core
|
|
51
|
+
# @param [Hash] args The url arguments
|
|
52
|
+
# @return [Hash] the parsed-out response
|
|
53
|
+
def _get(path, args={})
|
|
54
|
+
path.sub! /\A\//, ''
|
|
55
|
+
args['wt'] = 'json'
|
|
56
|
+
res = JSON.parse(raw_get_content(path, args))
|
|
57
|
+
if res['error']
|
|
58
|
+
raise RuntimeError.new, res['error']
|
|
59
|
+
end
|
|
60
|
+
res
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# post JSON data.
|
|
64
|
+
# @param [String] path The parts of the URL that comes after the core
|
|
65
|
+
# @param [Hash,Array] object_to_post The data to post as json
|
|
66
|
+
# @return [Hash] the parsed-out response
|
|
67
|
+
|
|
68
|
+
def _post_json(path, object_to_post)
|
|
69
|
+
resp = @rawclient.post(url(path), JSON.dump(object_to_post), {'Content-type' => 'application/json'})
|
|
70
|
+
JSON.parse(resp.content)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Get from solr, and return a Response object of some sort
|
|
74
|
+
# @return [SimpleSolrClient::Response, response_type]
|
|
75
|
+
def get(path, args = {}, response_type = nil)
|
|
76
|
+
response_type = SimpleSolrClient::Response::GenericResponse if response_type.nil?
|
|
77
|
+
response_type.new(_get(path, args))
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Post an object as JSON and return a Response object
|
|
81
|
+
# @return [SimpleSolrClient::Response, response_type]
|
|
82
|
+
def post_json(path, object_to_post, response_type = nil)
|
|
83
|
+
response_type = SimpleSolrClient::Response::GenericResponse if response_type.nil?
|
|
84
|
+
response_type.new(_post_json(path, object_to_post))
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# Get a client specific to the given core2
|
|
89
|
+
# @param [String] corename The name of the core (which must already exist!)
|
|
90
|
+
# @return [SimpleSolrClient::Core]
|
|
91
|
+
def core(corename)
|
|
92
|
+
SimpleSolrClient::Core.new(@base_url, corename)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def cores
|
|
97
|
+
cdata = get('admin/cores', {:force_top_level_url=>true}).status.keys
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# Create a new, temporary core
|
|
102
|
+
#noinspection RubyWrongHash
|
|
103
|
+
def new_core(corename)
|
|
104
|
+
dir = temp_core_dir_setup(corename)
|
|
105
|
+
|
|
106
|
+
args = {
|
|
107
|
+
:wt => 'json',
|
|
108
|
+
:action => 'CREATE',
|
|
109
|
+
:name => corename,
|
|
110
|
+
:instanceDir => dir
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
get('admin/cores', args)
|
|
114
|
+
core(corename)
|
|
115
|
+
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def temp_core
|
|
119
|
+
new_core('sstemp_' + SecureRandom.uuid)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Set up files for a temp core
|
|
123
|
+
def temp_core_dir_setup(corename)
|
|
124
|
+
dest = Dir.mktmpdir("simple_solr_#{corename}")
|
|
125
|
+
src = SAMPLE_CORE_DIR
|
|
126
|
+
FileUtils.cp_r File.join(src, '.'), dest
|
|
127
|
+
dest
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Unload all cores whose name includes 'sstemp'
|
|
131
|
+
def unload_temp_cores
|
|
132
|
+
cores.each do |k|
|
|
133
|
+
core(k).unload if k =~ /sstemp/
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
end
|