slingshot-rb 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -5,3 +5,4 @@ pkg/*
5
5
  rdoc/
6
6
  coverage/
7
7
  scratch/
8
+ examples/*.html
data/Rakefile CHANGED
@@ -50,3 +50,25 @@ rescue LoadError
50
50
  abort "RCov is not available. In order to run rcov, you must: sudo gem install rcov"
51
51
  end
52
52
  end
53
+
54
+ namespace :web do
55
+
56
+ desc "Update the Github website"
57
+ task :update => :generate do
58
+ current_branch = `git branch --no-color`.split("\n").select { |line| line =~ /^\* / }.to_s.gsub(/\* (.*)/, '\1')
59
+ (puts "Unable to determine current branch"; exit(1) ) unless current_branch
60
+ system "git stash save && git checkout web"
61
+ system "cp examples/slingshot-dsl.html index.html"
62
+ system "git add index.html && git co -m 'Updated Slingshot website'"
63
+ system "git push origin web:gh-pages -f"
64
+ system "git checkout #{current_branch} && git stash pop"
65
+ end
66
+
67
+ desc "Generate the Rocco documentation page"
68
+ task :generate do
69
+ system "rocco examples/slingshot-dsl.rb"
70
+ html = File.read('examples/slingshot-dsl.html').gsub!(/slingshot\-dsl\.rb/, 'slingshot.rb')
71
+ File.open('examples/slingshot-dsl.html', 'w') { |f| f.write html }
72
+ system "open examples/slingshot-dsl.html"
73
+ end
74
+ end
@@ -0,0 +1,378 @@
1
+ # **Slingshot** is a rich and comfortable Ruby API and DSL for the
2
+ # [_ElasticSearch_](http://www.elasticsearch.org/) search engine/database.
3
+ #
4
+ # _ElasticSearch_ is a scalable, distributed, highly-available,
5
+ # RESTful database communicating by JSON over HTTP, based on [Lucene](http://lucene.apache.org/),
6
+ # written in Java. It manages to be very simple and very powerful at the same time.
7
+ #
8
+ # By following these instructions you should have the search running
9
+ # on a sane operation system in less then 10 minutes.
10
+
11
+ #### Installation
12
+
13
+ # Install Slingshot with Rubygems.
14
+ #
15
+ # gem install slingshot-rb
16
+ #
17
+ require 'rubygems'
18
+ require 'slingshot'
19
+
20
+ #### Prerequisites
21
+
22
+ # You'll need a working and running _ElasticSearch_ server. Thankfully, that's easy.
23
+ ( puts <<-"INSTALL" ; exit(1) ) unless RestClient.get('http://localhost:9200') rescue false
24
+ [!] You don’t appear to have ElasticSearch installed. Please install and launch it with the following commands.
25
+ curl -k -L -o elasticsearch-0.15.0.tar.gz http://github.com/downloads/elasticsearch/elasticsearch/elasticsearch-0.15.0.tar.gz
26
+ tar -zxvf elasticsearch-0.15.0.tar.gz
27
+ ./elasticsearch-0.15.0/bin/elasticsearch -f
28
+ INSTALL
29
+
30
+ ### Simple Usage
31
+
32
+ #### Storing and indexing documents
33
+
34
+ # Let's initialize an index named “articles”.
35
+ Slingshot.index 'articles' do
36
+ # To make sure it's fresh, let's delete any existing index with the same name.
37
+ delete
38
+ # And then, let's create it.
39
+ create
40
+
41
+ # We want to store and index some articles with title and tags. Simple Hashes are OK.
42
+ store :title => 'One', :tags => ['ruby'], :published_on => '2011-01-01'
43
+ store :title => 'Two', :tags => ['ruby', 'python'], :published_on => '2011-01-02'
44
+ store :title => 'Three', :tags => ['java'], :published_on => '2011-01-02'
45
+ store :title => 'Four', :tags => ['ruby', 'php'], :published_on => '2011-01-03'
46
+
47
+ # We force refreshing the index, so we can query it immediately.
48
+ refresh
49
+ end
50
+
51
+ # We may want to define a specific [mapping](http://www.elasticsearch.org/guide/reference/api/admin-indices-create-index.html)
52
+ # for the index.
53
+
54
+ Slingshot.index 'articles' do
55
+ # To do so, just pass a Hash containing the specified mapping to the `Index#create` method.
56
+ create :mappings => {
57
+ # Specify for which type of documents this mapping should be used (`article` in this case).
58
+ :article => {
59
+ :properties => {
60
+ # Specify the type of the field, whether it should be analyzed, etc.
61
+ :id => { :type => 'string', :index => 'not_analyzed', :include_in_all => false },
62
+ # Set the boost or analyzer settings for the field, et cetera. The _ElasticSearch_ guide
63
+ # has [more information](http://elasticsearch.org/guide/reference/mapping/index.html).
64
+ :title => { :type => 'string', :boost => 2.0, :analyzer => 'snowball' },
65
+ :tags => { :type => 'string', :analyzer => 'keyword' },
66
+ :content => { :type => 'string', :analyzer => 'snowball' }
67
+ }
68
+ }
69
+ }
70
+ end
71
+
72
+
73
+
74
+ #### Searching
75
+
76
+ # With the documents indexed and stored in the _ElasticSearch_ database, we want to search for them.
77
+ #
78
+ # Slingshot exposes the search interface via simple domain-specific language.
79
+
80
+
81
+ ##### Simple Query String Searches
82
+
83
+ # We can do simple searches, like searching for articles containing “One” in their title.
84
+ s = Slingshot.search('articles') do
85
+ query do
86
+ string "title:One"
87
+ end
88
+ end
89
+
90
+ # The results:
91
+ # * One [tags: ruby]
92
+ s.results.each do |document|
93
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
94
+ end
95
+
96
+ # Of course, we may write the blocks in shorter notation.
97
+
98
+ # Let's search for articles whose titles begin with letter “T”.
99
+ s = Slingshot.search('articles') { query { string "title:T*" } }
100
+
101
+ # The results:
102
+ # * Two [tags: ruby, python]
103
+ # * Three [tags: java]
104
+ s.results.each do |document|
105
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
106
+ end
107
+
108
+ # We can use any valid [Lucene query syntax](http://lucene.apache.org/java/3_0_3/queryparsersyntax.html)
109
+ # for the query string queries.
110
+
111
+ # For debugging, we can display the JSON which is being sent to _ElasticSearch_.
112
+ #
113
+ # {"query":{"query_string":{"query":"title:T*"}}}
114
+ #
115
+ puts "", "Query:", "-"*80
116
+ puts s.to_json
117
+
118
+ # Or better, we may display a complete `curl` command, so we can execute it in terminal
119
+ # to see the raw output, tweak params and debug any problems.
120
+ #
121
+ # curl -X POST "http://localhost:9200/articles/_search?pretty=true" \
122
+ # -d '{"query":{"query_string":{"query":"title:T*"}}}'
123
+ #
124
+ puts "", "Try the query in Curl:", "-"*80
125
+ puts s.to_curl
126
+
127
+
128
+ ##### Other Types of Queries
129
+
130
+ # Of course, we may want to define our queries more expressively, for instance
131
+ # when we're searching for articles with specific _tags_.
132
+
133
+ # Let's suppose we want to search for articles tagged “ruby” _or_ “python”.
134
+ # That's a great excuse to use a [_terms_](http://elasticsearch.org/guide/reference/query-dsl/terms-query.html)
135
+ # query.
136
+ s = Slingshot.search('articles') do
137
+ query do
138
+ terms :tags, ['ruby', 'python']
139
+ end
140
+ end
141
+
142
+ # The search, as expected, returns three articles, all tagged “ruby” — among other tags:
143
+ #
144
+ # * Two [tags: ruby, python]
145
+ # * One [tags: ruby]
146
+ # * Four [tags: ruby, php]
147
+ s.results.each do |document|
148
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
149
+ end
150
+
151
+ # What if we wanted to search for articles tagged both “ruby” _and_ “python”.
152
+ # That's a great excuse to specify `minimum_match` for the query.
153
+ s = Slingshot.search('articles') do
154
+ query do
155
+ terms :tags, ['ruby', 'python'], :minimum_match => 2
156
+ end
157
+ end
158
+
159
+ # The search, as expected, returns one article, tagged with _both_ “ruby” and “python”:
160
+ #
161
+ # * Two [tags: ruby, python]
162
+ s.results.each do |document|
163
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
164
+ end
165
+
166
+ # _ElasticSearch_ allows us to do many more types of queries.
167
+ # Eventually, _Slingshot_ will support all of them.
168
+ # So far, only these are supported:
169
+ #
170
+ # * [term](http://elasticsearch.org/guide/reference/query-dsl/term-query.html)
171
+ # * [terms](http://elasticsearch.org/guide/reference/query-dsl/terms-query.html)
172
+
173
+ ##### Faceted Search
174
+
175
+ # _ElasticSearch_ makes it trivial to retrieve complex aggregated data from our index/database,
176
+ # so called [_facets_](http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Faceted-Search-Solr).
177
+
178
+ # Let's say we want to display article counts for every tag in the database.
179
+ # For that, we'll use a _terms_ facet.
180
+
181
+ #
182
+ s = Slingshot.search 'articles' do
183
+ # We will search for articles whose title begins with letter “T”,
184
+ query { string 'title:T*' }
185
+
186
+ # and retrieve their counts “bucketed” by their `tags`.
187
+ facet 'tags' do
188
+ terms :tags
189
+ end
190
+ end
191
+
192
+ # As we see, our query has found two articles, and if you recall our articles from above,
193
+ # _Two_ is tagged with “ruby” and “python”, _Three_ is tagged with “java”. So the counts
194
+ # won't surprise us:
195
+ # Found 2 articles: Three, Two
196
+ # Counts:
197
+ # -------
198
+ # ruby 1
199
+ # python 1
200
+ # java 1
201
+ puts "Found #{s.results.count} articles: #{s.results.map(&:title).join(', ')}"
202
+ puts "Counts based on tags:", "-"*25
203
+ s.results.facets['tags']['terms'].each do |f|
204
+ puts "#{f['term'].ljust(10)} #{f['count']}"
205
+ end
206
+
207
+ # These counts are based on the scope of our current query (called `main` in _ElasticSearch_).
208
+ # What if we wanted to display aggregated counts by `tags` across the whole database?
209
+
210
+ #
211
+ s = Slingshot.search 'articles' do
212
+ query { string 'title:T*' }
213
+
214
+ facet 'global-tags' do
215
+ # That's where the `global` scope for a facet comes in.
216
+ terms :tags, :global => true
217
+ end
218
+
219
+ # As you can see, we can even combine facets scoped
220
+ # to the current query with global facets.
221
+ facet 'current-tags' do
222
+ terms :tags
223
+ end
224
+ end
225
+
226
+ # Aggregated results for the current query are the same as previously:
227
+ # Current query facets:
228
+ # -------------------------
229
+ # ruby 1
230
+ # python 1
231
+ # java 1
232
+ puts "Current query facets:", "-"*25
233
+ s.results.facets['current-tags']['terms'].each do |f|
234
+ puts "#{f['term'].ljust(10)} #{f['count']}"
235
+ end
236
+
237
+ # As we see, aggregated results for the global scope include also
238
+ # tags for articles not matched by the query, such as “java” or “php”:
239
+ # Global facets:
240
+ # -------------------------
241
+ # ruby 3
242
+ # python 1
243
+ # php 1
244
+ # java 1
245
+ puts "Global facets:", "-"*25
246
+ s.results.facets['global-tags']['terms'].each do |f|
247
+ puts "#{f['term'].ljust(10)} #{f['count']}"
248
+ end
249
+
250
+ # The real power of facets lies in their combination with
251
+ # [filters](http://elasticsearch.karmi.cz/guide/reference/api/search/filter.html),
252
+ # though:
253
+
254
+ # > When doing things like facet navigation,
255
+ # > sometimes only the hits are needed to be filtered by the chosen facet,
256
+ # > and all the facets should continue to be calculated based on the original query.
257
+
258
+
259
+ ##### Filtered Search
260
+
261
+ # So, let's make out search a bit complex. Let's search for articles whose titles begin
262
+ # with letter “T”, again, but filter the results, so only the articles tagged “ruby”
263
+ # are returned.
264
+ s = Slingshot.search 'articles' do
265
+
266
+ # We use the same **query** as before.
267
+ query { string 'title:T*' }
268
+
269
+ # And add a _terms_ **filter** based on tags.
270
+ filter :terms, :tags => ['ruby']
271
+
272
+ # And, of course, our facet definition.
273
+ facet('tags') { terms :tags }
274
+
275
+ end
276
+
277
+ # We see that only the article _Two_ (tagged “ruby” and “python”) was returned,
278
+ # _not_ the article _Three_ (tagged “java”):
279
+ #
280
+ # * Two [tags: ruby, python]
281
+ s.results.each do |document|
282
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
283
+ end
284
+
285
+ # However, count for article _Three_'s tags, “java”, _is_ in fact included in facets:
286
+ #
287
+ # Counts based on tags:
288
+ # -------------------------
289
+ # ruby 1
290
+ # python 1
291
+ # java 1
292
+ puts "Counts based on tags:", "-"*25
293
+ s.results.facets['tags']['terms'].each do |f|
294
+ puts "#{f['term'].ljust(10)} #{f['count']}"
295
+ end
296
+
297
+
298
+ ##### Sorting
299
+
300
+ # By default, the results are sorted according to their relevancy
301
+ # (available as the `_score` property).
302
+
303
+ # But, what if we want to sort the results based on some other criteria,
304
+ # such as published date, price, etc? We can do that.
305
+ s = Slingshot.search 'articles' do
306
+ # We search for articles tagged “ruby”
307
+ query { string 'tags:ruby' }
308
+
309
+ # And sort them by their `title`, in descending order.
310
+ sort { title 'desc' }
311
+ end
312
+
313
+ # The results:
314
+ # * Two
315
+ # * One
316
+ # * Four
317
+ s.results.each do |document|
318
+ puts "* #{ document.title }"
319
+ end
320
+
321
+ # Of course, it's possible to combine more fields in the sorting definition.
322
+
323
+ s = Slingshot.search 'articles' do
324
+ # We will just get all articles for this case.
325
+ query { string '*' }
326
+
327
+ sort do
328
+ # We will sort the results by their `published_on` property in ascending (default) order,
329
+ published_on
330
+ # and by their `title` property, in descending order.
331
+ title 'desc'
332
+ end
333
+ end
334
+
335
+ # The results:
336
+ # * One (Published on: 2011-01-01)
337
+ # * Two (Published on: 2011-01-02)
338
+ # * Three (Published on: 2011-01-02)
339
+ # * Four (Published on: 2011-01-03)
340
+ s.results.each do |document|
341
+ puts "* #{ document.title.ljust(10) } (Published on: #{ document.published_on })"
342
+ end
343
+
344
+ ##### Highlighting
345
+
346
+ # Often, you want to highlight the matched snippets in your text.
347
+ # _ElasticSearch_ provides many features for
348
+ # [highlighting](http://www.elasticsearch.org/guide/reference/api/search/highlighting.html),
349
+ #
350
+ s = Slingshot.search 'articles' do
351
+ # Let's search for documents containing “Two” in their titles.
352
+ query { string 'title:Two' }
353
+
354
+ # And use the `highlight` method.
355
+ highlight :title
356
+ end
357
+
358
+ # The results:
359
+ # Title: Two, highlighted title: <em>Two</em>
360
+ s.results.each do |document|
361
+ puts "Title: #{ document.title }, highlighted title: #{document.highlight.title}"
362
+ end
363
+
364
+ # Slingshot allows you to specify options for the highlighting, such as:
365
+ #
366
+ s = Slingshot.search 'articles' do
367
+ query { string 'title:Two' }
368
+
369
+ # • specifying the fields to highlight
370
+ highlight :title, :body
371
+
372
+ # • specifying their options
373
+ highlight :title, :body => { :number_of_fragments => 0 }
374
+
375
+ # • or specifying highlighting options, such as the wrapper tag
376
+ highlight :title, :body, :options => { :tag => '<strong class="highlight">' }
377
+ end
378
+
data/lib/slingshot.rb CHANGED
@@ -2,6 +2,7 @@ require 'rest_client'
2
2
  require 'yajl/json_gem'
3
3
 
4
4
  require 'slingshot/rubyext/hash'
5
+ require 'slingshot/logger'
5
6
  require 'slingshot/configuration'
6
7
  require 'slingshot/client'
7
8
  require 'slingshot/client'
@@ -10,6 +11,7 @@ require 'slingshot/search/query'
10
11
  require 'slingshot/search/sort'
11
12
  require 'slingshot/search/facet'
12
13
  require 'slingshot/search/filter'
14
+ require 'slingshot/search/highlight'
13
15
  require 'slingshot/results/collection'
14
16
  require 'slingshot/results/item'
15
17
  require 'slingshot/index'
@@ -14,6 +14,11 @@ module Slingshot
14
14
  @wrapper = klass || @wrapper || Results::Item
15
15
  end
16
16
 
17
+ def self.logger(device=nil, options={})
18
+ return @logger = Logger.new(device, options) if device
19
+ @logger || nil
20
+ end
21
+
17
22
  def self.reset(*properties)
18
23
  reset_variables = properties.empty? ? instance_variables : instance_variables & properties.map { |p| "@#{p}" }
19
24
  reset_variables.each { |v| instance_variable_set(v, nil) }
@@ -0,0 +1,61 @@
1
+ module Slingshot
2
+ class Logger
3
+
4
+ def initialize(device, options={})
5
+ @device = if device.respond_to?(:write)
6
+ device
7
+ else
8
+ File.open(device, 'a')
9
+ end
10
+ @device.sync = true
11
+ @options = options
12
+ at_exit { @device.close unless @device.closed? }
13
+ end
14
+
15
+ def level
16
+ @options[:level] || 'info'
17
+ end
18
+
19
+ def write(message)
20
+ @device.write message
21
+ end
22
+
23
+ def log_request(endpoint, params=nil, curl='')
24
+ # [_search] (articles,users) 2001-02-12 18:20:42:32
25
+ #
26
+ # curl -X POST ....
27
+ #
28
+ content = "# [#{endpoint}] "
29
+ content += "(#{params.inspect}) " if params
30
+ content += time
31
+ content += "\n#\n"
32
+ content += curl
33
+ content += "\n\n"
34
+ write content
35
+ end
36
+
37
+ def log_response(status, json)
38
+ # [200 OK] (4 msec) Sat Feb 12 19:20:47 2011
39
+ #
40
+ # {
41
+ # "took" : 4,
42
+ # "hits" : [...]
43
+ # ...
44
+ # }
45
+ #
46
+ took = JSON.parse(json)['took'] rescue nil
47
+ content = "# [#{status}] "
48
+ content += "(#{took} msec) " if took
49
+ content += time
50
+ content += "\n#\n"
51
+ json.each_line { |line| content += "# #{line}" }
52
+ content += "\n\n"
53
+ write content
54
+ end
55
+
56
+ def time
57
+ Time.now.strftime('%Y-%m-%d %H:%M:%S:%L')
58
+ end
59
+
60
+ end
61
+ end
@@ -13,6 +13,7 @@ module Slingshot
13
13
  h
14
14
  else
15
15
  document = h['_source'] ? h['_source'] : h['fields']
16
+ document['highlight'] = h['highlight'] if h['highlight']
16
17
  h.update document if document
17
18
  Configuration.wrapper.new(h)
18
19
  end
@@ -3,7 +3,7 @@ module Slingshot
3
3
 
4
4
  class Search
5
5
 
6
- attr_reader :indices, :url, :results, :response, :query, :facets, :filters
6
+ attr_reader :indices, :url, :results, :response, :json, :query, :facets, :filters
7
7
 
8
8
  def initialize(*indices, &block)
9
9
  @options = indices.pop if indices.last.is_a?(Hash)
@@ -37,6 +37,15 @@ module Slingshot
37
37
  self
38
38
  end
39
39
 
40
+ def highlight(*args)
41
+ unless args.empty?
42
+ @highlight = Highlight.new(*args)
43
+ self
44
+ else
45
+ @highlight
46
+ end
47
+ end
48
+
40
49
  def from(value)
41
50
  @from = value
42
51
  self
@@ -53,17 +62,33 @@ module Slingshot
53
62
  end
54
63
 
55
64
  def perform
56
- @url = "#{Configuration.url}/#{indices.join(',')}/_search"
57
- @response = JSON.parse( Configuration.client.post(@url, self.to_json) )
58
- @results = Results::Collection.new(@response)
65
+ @url = "#{Configuration.url}/#{indices.join(',')}/_search"
66
+ @response = Configuration.client.post(@url, self.to_json)
67
+ @json = Yajl::Parser.parse(@response)
68
+ @results = Results::Collection.new(@json)
59
69
  self
60
- rescue Exception
61
- STDERR.puts "Request failed: \n#{self.to_curl}"
70
+ rescue Exception => e
71
+ STDERR.puts "[REQUEST FAILED]\n#{self.to_curl}\n"
62
72
  raise
73
+ ensure
74
+ if Configuration.logger
75
+ Configuration.logger.log_request '_search', indices, to_curl
76
+ if Configuration.logger.level == 'debug'
77
+ # FIXME: Depends on RestClient implementation
78
+ if @response
79
+ code = @response.code
80
+ body = Yajl::Encoder.encode(@json, :pretty => true)
81
+ else
82
+ code = e.message
83
+ body = e.http_body
84
+ end
85
+ Configuration.logger.log_response code, body
86
+ end
87
+ end
63
88
  end
64
89
 
65
90
  def to_curl
66
- %Q|curl -X POST "http://localhost:9200/#{indices}/_search?pretty=true" -d '#{self.to_json}'|
91
+ %Q|curl -X POST "#{Configuration.url}/#{indices}/_search?pretty=true" -d '#{self.to_json}'|
67
92
  end
68
93
 
69
94
  def to_json
@@ -72,6 +97,7 @@ module Slingshot
72
97
  request.update( { :sort => @sort } ) if @sort
73
98
  request.update( { :facets => @facets } ) if @facets
74
99
  @filters.each { |filter| request.update( { :filter => filter } ) } if @filters
100
+ request.update( { :highlight => @highlight } ) if @highlight
75
101
  request.update( { :size => @size } ) if @size
76
102
  request.update( { :from => @from } ) if @from
77
103
  request.update( { :fields => @fields } ) if @fields
@@ -3,7 +3,7 @@ module Slingshot
3
3
 
4
4
  #--
5
5
  # TODO: Implement all elastic search facets (geo, histogram, range, etc)
6
- # https://github.com/elasticsearch/elasticsearch/wiki/Search-API-Facets
6
+ # http://elasticsearch.org/guide/reference/api/search/facets/
7
7
  #++
8
8
 
9
9
  class Facet
@@ -14,8 +14,13 @@ module Slingshot
14
14
  self.instance_eval(&block) if block_given?
15
15
  end
16
16
 
17
- def terms(field, options={})
18
- @value = { :terms => { :field => field } }.update(options)
17
+ def terms(field, size=10, options={})
18
+ @value = { :terms => { :field => field, :size => size } }.update(options)
19
+ self
20
+ end
21
+
22
+ def date(field, interval='day', options={})
23
+ @value = { :date_histogram => { :field => field, :interval => interval } }.update(options)
19
24
  self
20
25
  end
21
26
 
@@ -0,0 +1,39 @@
1
+ module Slingshot
2
+ module Search
3
+
4
+ # http://www.elasticsearch.org/guide/reference/api/search/highlighting.html
5
+ #
6
+ class Highlight
7
+
8
+ def initialize(*args)
9
+ @options = (args.last.is_a?(Hash) && args.last.delete(:options)) || {}
10
+ extract_highlight_tags
11
+ @fields = args.inject({}) do |result, field|
12
+ field.is_a?(Hash) ? result.update(field) : result[field.to_sym] = {}; result
13
+ end
14
+ end
15
+
16
+ def to_json
17
+ to_hash.to_json
18
+ end
19
+
20
+ def to_hash
21
+ h = { :fields => @fields }
22
+ h.update @options
23
+ return h
24
+ end
25
+
26
+ private
27
+
28
+ def extract_highlight_tags
29
+ if tag = @options.delete(:tag)
30
+ @options.update \
31
+ :pre_tags => [tag],
32
+ :post_tags => [tag.to_s.gsub(/^<([a-z]+).*/, '</\1>')]
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ end
39
+ end
@@ -23,6 +23,11 @@ module Slingshot
23
23
  @value
24
24
  end
25
25
 
26
+ def all
27
+ @value = { :match_all => {} }
28
+ @value
29
+ end
30
+
26
31
  def to_json
27
32
  @value.to_json
28
33
  end
@@ -1,3 +1,3 @@
1
1
  module Slingshot
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.7"
3
3
  end
@@ -1 +1 @@
1
- {"title" : "One", "tags" : ["ruby"]}
1
+ {"title" : "One", "tags" : ["ruby"], "published_on" : "2011-01-01"}
@@ -1 +1 @@
1
- {"title" : "Two", "tags" : ["ruby", "python"]}
1
+ {"title" : "Two", "tags" : ["ruby", "python"], "published_on" : "2011-01-02"}
@@ -1 +1 @@
1
- {"title" : "Three", "tags" : ["java"]}
1
+ {"title" : "Three", "tags" : ["java"], "published_on" : "2011-01-02"}
@@ -1 +1 @@
1
- {"title" : "Four", "tags" : ["erlang"]}
1
+ {"title" : "Four", "tags" : ["erlang"], "published_on" : "2011-01-03"}
@@ -1 +1 @@
1
- {"title" : "Five", "tags" : ["javascript", "java"]}
1
+ {"title" : "Five", "tags" : ["javascript", "java"], "published_on" : "2011-01-04"}
@@ -1,4 +1,5 @@
1
1
  require 'test_helper'
2
+ require 'date'
2
3
 
3
4
  module Slingshot
4
5
 
@@ -40,6 +41,23 @@ module Slingshot
40
41
  assert_equal 5, global_facets.count
41
42
  end
42
43
 
44
+ context "date histogram" do
45
+
46
+ should "return aggregated values for all results" do
47
+ s = Slingshot.search('articles-test') do
48
+ query { all }
49
+ facet 'published_on' do
50
+ date :published_on
51
+ end
52
+ end
53
+
54
+ facets = s.results.facets['published_on']['entries']
55
+ assert_equal 4, facets.size, facets.inspect
56
+ assert_equal 2, facets.entries[1]["count"], facets.inspect
57
+ end
58
+
59
+ end
60
+
43
61
  end
44
62
 
45
63
  end
@@ -0,0 +1,26 @@
1
+ require 'test_helper'
2
+
3
+ module Slingshot
4
+
5
+ class HighlightIntegrationTest < Test::Unit::TestCase
6
+ include Test::Integration
7
+
8
+ context "Highlight" do
9
+
10
+ should "add 'highlight' field to the result item" do
11
+ Slingshot::Configuration.logger STDERR, :level => 'debug'
12
+ s = Slingshot.search('articles-test') do
13
+ # query { string '-w' }
14
+ highlight :title
15
+ end
16
+
17
+ doc = s.results.first
18
+
19
+ assert_equal 1, doc.highlight.title.size
20
+ assert doc.highlight.title.to_s.include?('<em>'), "Highlight does not include default highlight tag"
21
+ end
22
+
23
+ end
24
+
25
+ end
26
+ end
@@ -10,6 +10,10 @@ module Slingshot
10
10
  Configuration.instance_variable_set(:@client, nil)
11
11
  end
12
12
 
13
+ teardown do
14
+ Configuration.reset
15
+ end
16
+
13
17
  should "return default URL" do
14
18
  assert_equal 'http://localhost:9200', Configuration.url
15
19
  end
@@ -28,6 +32,16 @@ module Slingshot
28
32
  assert_equal Client::Base, Configuration.client
29
33
  end
30
34
 
35
+ should "return nil as logger by default" do
36
+ assert_nil Configuration.logger
37
+ end
38
+
39
+ should "return set and return logger" do
40
+ Configuration.logger STDERR
41
+ assert_not_nil Configuration.logger
42
+ assert_instance_of Slingshot::Logger, Configuration.logger
43
+ end
44
+
31
45
  should "allow to reset the configuration for specific property" do
32
46
  Configuration.url 'http://example.com'
33
47
  Configuration.client Client::Base
@@ -0,0 +1,114 @@
1
+ require 'test_helper'
2
+ require 'time'
3
+
4
+ module Slingshot
5
+
6
+ class LoggerTest < Test::Unit::TestCase
7
+ include Slingshot
8
+
9
+ context "Logger" do
10
+
11
+ context "initialized with an IO object" do
12
+
13
+ should "take STDOUT" do
14
+ assert_nothing_raised do
15
+ logger = Logger.new STDOUT
16
+ end
17
+ end
18
+
19
+ should "write to STDERR" do
20
+ STDERR.expects(:write).with('BOOM!')
21
+ logger = Logger.new STDERR
22
+ logger.write('BOOM!')
23
+ end
24
+
25
+ end
26
+
27
+ context "initialized with file" do
28
+ teardown { File.delete('myfile.log') }
29
+
30
+ should "create the file" do
31
+ assert_nothing_raised do
32
+ logger = Logger.new 'myfile.log'
33
+ assert File.exists?('myfile.log')
34
+ end
35
+ end
36
+
37
+ should "write to file" do
38
+ File.any_instance.expects(:write).with('BOOM!')
39
+ logger = Logger.new 'myfile.log'
40
+ logger.write('BOOM!')
41
+ end
42
+
43
+ end
44
+
45
+ end
46
+
47
+ context "levels" do
48
+
49
+ should "have default level" do
50
+ logger = Logger.new STDERR
51
+ assert_equal 'info', logger.level
52
+ end
53
+
54
+ should "have set the level" do
55
+ logger = Logger.new STDERR, :level => 'debug'
56
+ assert_equal 'debug', logger.level
57
+ end
58
+
59
+ end
60
+
61
+ context "tracing requests" do
62
+ setup do
63
+ Time.stubs(:now).returns(Time.parse('2011-03-19 11:00'))
64
+ @logger = Logger.new STDERR
65
+ end
66
+
67
+ should "log request in correct format" do
68
+ log = (<<-"log;").gsub(/^ +/, '')
69
+ # [_search] (["articles", "users"]) 2011-03-19 11:00:00:L
70
+ #
71
+ curl -X GET http://...
72
+
73
+ log;
74
+ @logger.expects(:write).with(log)
75
+ @logger.log_request('_search', ["articles", "users"], 'curl -X GET http://...')
76
+ end
77
+
78
+ should "log response in correct format" do
79
+ json = (<<-"json;").gsub(/^\s*/, '')
80
+ {
81
+ "took" : 4,
82
+ "hits" : {
83
+ "total" : 20,
84
+ "max_score" : 1.0,
85
+ "hits" : [ {
86
+ "_index" : "articles",
87
+ "_type" : "article",
88
+ "_id" : "Hmg0B0VSRKm2VAlsasdnqg",
89
+ "_score" : 1.0, "_source" : { "title" : "Article 1", "published_on" : "2011-01-01" }
90
+ }, {
91
+ "_index" : "articles",
92
+ "_type" : "article",
93
+ "_id" : "booSWC8eRly2I06GTUilNA",
94
+ "_score" : 1.0, "_source" : { "title" : "Article 2", "published_on" : "2011-01-12" }
95
+ }
96
+ ]
97
+ }
98
+ }
99
+ json;
100
+ log = (<<-"log;").gsub(/^\s*/, '')
101
+ # [200 OK] (4 msec) 2011-03-19 11:00:00:L
102
+ #
103
+ log;
104
+ # log += json.split.map { |line| "# #{line}" }.join("\n")
105
+ json.each_line { |line| log += "# #{line}" }
106
+ log += "\n\n"
107
+ @logger.expects(:write).with(log)
108
+ @logger.log_response('200 OK', json)
109
+ end
110
+
111
+ end
112
+
113
+ end
114
+ end
@@ -13,26 +13,33 @@ module Slingshot::Search
13
13
  context "generally" do
14
14
 
15
15
  should "encode facets with defaults for current query" do
16
- assert_equal( { :foo => { :terms => {:field=>'bar'} } }.to_json, Facet.new('foo').terms(:bar).to_json )
16
+ assert_equal( { :foo => { :terms => {:field=>'bar',:size=>10} } }.to_json, Facet.new('foo').terms(:bar).to_json )
17
17
  end
18
18
 
19
19
  should "encode facets as global" do
20
- assert_equal( { :foo => { :terms => {:field=>'bar'}, :global => true } }.to_json,
20
+ assert_equal( { :foo => { :terms => {:field=>'bar',:size=>10}, :global => true } }.to_json,
21
21
  Facet.new('foo', :global => true).terms(:bar).to_json )
22
22
  end
23
23
 
24
24
  should "encode facet options" do
25
- assert_equal( { :foo => { :terms => {:field=>'bar'}, :size => 5 } }.to_json,
26
- Facet.new('foo', :size => 5).terms(:bar).to_json )
27
- assert_equal( { :foo => { :terms => {:field=>'bar'}, :size => 5 } }.to_json,
28
- Facet.new('foo').terms(:bar, :size => 5).to_json )
25
+ assert_equal( { :foo => { :terms => {:field=>'bar',:size=>5} } }.to_json,
26
+ Facet.new('foo').terms(:bar, 5).to_json )
29
27
  end
30
28
 
31
29
  should "encode facets when passed as a block" do
32
30
  f = Facet.new('foo') do
33
31
  terms :bar
34
32
  end
35
- assert_equal( { :foo => { :terms => {:field=>'bar'} } }.to_json, f.to_json )
33
+ assert_equal( { :foo => { :terms => {:field=>'bar',:size=>10} } }.to_json, f.to_json )
34
+ end
35
+
36
+ end
37
+
38
+ context "date histogram" do
39
+
40
+ should "encode the JSON" do
41
+ f = Facet.new('date') { date :published_on, 'day' }
42
+ assert_equal({ :date => { :date_histogram => { :field => 'published_on', :interval => 'day' } } }.to_json, f.to_json)
36
43
  end
37
44
 
38
45
  end
@@ -0,0 +1,46 @@
1
+ require 'test_helper'
2
+
3
+ module Slingshot::Search
4
+
5
+ class HighlightTest < Test::Unit::TestCase
6
+
7
+ context "Highlight" do
8
+
9
+ should "be serialized to JSON" do
10
+ assert_respond_to Highlight.new(:body), :to_json
11
+ end
12
+
13
+ should "specify highlight for single field" do
14
+ assert_equal( {:fields => { :body => {} }}.to_json,
15
+ Highlight.new(:body).to_json )
16
+ end
17
+
18
+ should "specify highlight for more fields" do
19
+ assert_equal( {:fields => { :title => {}, :body => {} }}.to_json,
20
+ Highlight.new(:title, :body).to_json )
21
+ end
22
+
23
+ should "specify highlight for more fields with options" do
24
+ assert_equal( {:fields => { :title => {}, :body => { :a => 1, :b => 2 } }}.to_json,
25
+ Highlight.new(:title, :body => { :a => 1, :b => 2 }).to_json )
26
+ end
27
+
28
+ should "specify highlight for more fields with highlight options" do
29
+ # p Highlight.new(:title, :body => {}, :options => { :tag => '<strong>' }).to_hash
30
+ assert_equal( {:fields => { :title => {}, :body => {} }, :pre_tags => ['<strong>'], :post_tags => ['</strong>'] }.to_json,
31
+ Highlight.new(:title, :body => {}, :options => { :tag => '<strong>' }).to_json )
32
+ end
33
+
34
+ context "with custom tags" do
35
+
36
+ should "properly parse tags with class" do
37
+ assert_equal( {:fields => { :title => {} }, :pre_tags => ['<strong class="highlight">'], :post_tags => ['</strong>'] }.to_json,
38
+ Highlight.new(:title, :options => { :tag => '<strong class="highlight">' }).to_json )
39
+ end
40
+
41
+ end
42
+
43
+ end
44
+
45
+ end
46
+ end
@@ -33,6 +33,10 @@ module Slingshot::Search
33
33
  Query.new.string('foo', :default_field => 'title') )
34
34
  end
35
35
 
36
+ should "search for all documents" do
37
+ assert_equal( { :match_all => { } }, Query.new.all )
38
+ end
39
+
36
40
  end
37
41
 
38
42
  end
@@ -5,6 +5,7 @@ module Slingshot
5
5
  class SearchTest < Test::Unit::TestCase
6
6
 
7
7
  context "Search" do
8
+ setup { Configuration.reset :logger }
8
9
 
9
10
  should "be initialized with index/indices" do
10
11
  assert_raise(ArgumentError) { Search::Search.new }
@@ -54,6 +55,17 @@ module Slingshot
54
55
  assert_raise(RestClient::InternalServerError) { s.perform }
55
56
  end
56
57
 
58
+ should "log request, but not response, when logger is set" do
59
+ Configuration.logger STDERR
60
+
61
+ Configuration.client.expects(:post).returns('{"hits":[]}')
62
+ Results::Collection.expects(:new).returns([])
63
+ Configuration.logger.expects(:log_request).returns(true)
64
+ Configuration.logger.expects(:log_response).never
65
+
66
+ Search::Search.new('index').perform
67
+ end
68
+
57
69
  context "sort" do
58
70
 
59
71
  should "allow sorting by multiple fields" do
@@ -71,7 +83,7 @@ module Slingshot
71
83
 
72
84
  context "facets" do
73
85
 
74
- should "allow searching for facets" do
86
+ should "retrieve terms facets" do
75
87
  s = Search::Search.new('index') do
76
88
  facet('foo1') { terms :bar, :global => true }
77
89
  facet('foo2', :global => true) { terms :bar }
@@ -83,6 +95,14 @@ module Slingshot
83
95
  assert_not_nil s.facets['foo3']
84
96
  end
85
97
 
98
+ should "retrieve date histogram facets" do
99
+ s = Search::Search.new('index') do
100
+ facet('date') { date :published_on }
101
+ end
102
+ assert_equal 1, s.facets.keys.size
103
+ assert_not_nil s.facets['date']
104
+ end
105
+
86
106
  end
87
107
 
88
108
  context "filter" do
@@ -99,6 +119,37 @@ module Slingshot
99
119
 
100
120
  end
101
121
 
122
+ context "highlight" do
123
+
124
+ should "allow to specify highlight for single field" do
125
+ s = Search::Search.new('index') do
126
+ highlight :body
127
+ end
128
+
129
+ assert_not_nil s.highlight
130
+ assert_instance_of Slingshot::Search::Highlight, s.highlight
131
+ end
132
+
133
+ should "allow to specify highlight for more fields" do
134
+ s = Search::Search.new('index') do
135
+ highlight :body, :title
136
+ end
137
+
138
+ assert_not_nil s.highlight
139
+ assert_instance_of Slingshot::Search::Highlight, s.highlight
140
+ end
141
+
142
+ should "allow to specify highlight with for more fields with options" do
143
+ s = Search::Search.new('index') do
144
+ highlight :body, :title => { :fragment_size => 150, :number_of_fragments => 3 }
145
+ end
146
+
147
+ assert_not_nil s.highlight
148
+ assert_instance_of Slingshot::Search::Highlight, s.highlight
149
+ end
150
+
151
+ end
152
+
102
153
  context "with from/size" do
103
154
 
104
155
  should "set the values in request" do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slingshot-rb
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 6
10
- version: 0.0.6
9
+ - 7
10
+ version: 0.0.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Karel Minarik
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-03-13 00:00:00 +01:00
18
+ date: 2011-04-02 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -152,18 +152,21 @@ files:
152
152
  - README.markdown
153
153
  - Rakefile
154
154
  - examples/dsl.rb
155
+ - examples/slingshot-dsl.rb
155
156
  - lib/slingshot-rb.rb
156
157
  - lib/slingshot.rb
157
158
  - lib/slingshot/client.rb
158
159
  - lib/slingshot/configuration.rb
159
160
  - lib/slingshot/dsl.rb
160
161
  - lib/slingshot/index.rb
162
+ - lib/slingshot/logger.rb
161
163
  - lib/slingshot/results/collection.rb
162
164
  - lib/slingshot/results/item.rb
163
165
  - lib/slingshot/rubyext/hash.rb
164
166
  - lib/slingshot/search.rb
165
167
  - lib/slingshot/search/facet.rb
166
168
  - lib/slingshot/search/filter.rb
169
+ - lib/slingshot/search/highlight.rb
167
170
  - lib/slingshot/search/query.rb
168
171
  - lib/slingshot/search/sort.rb
169
172
  - lib/slingshot/version.rb
@@ -176,6 +179,7 @@ files:
176
179
  - test/fixtures/articles/5.json
177
180
  - test/integration/facets_test.rb
178
181
  - test/integration/filters_test.rb
182
+ - test/integration/highlight_test.rb
179
183
  - test/integration/index_mapping_test.rb
180
184
  - test/integration/index_store_test.rb
181
185
  - test/integration/query_string_test.rb
@@ -186,10 +190,12 @@ files:
186
190
  - test/unit/client_test.rb
187
191
  - test/unit/configuration_test.rb
188
192
  - test/unit/index_test.rb
193
+ - test/unit/logger_test.rb
189
194
  - test/unit/results_collection_test.rb
190
195
  - test/unit/results_item_test.rb
191
196
  - test/unit/search_facet_test.rb
192
197
  - test/unit/search_filter_test.rb
198
+ - test/unit/search_highlight_test.rb
193
199
  - test/unit/search_query_test.rb
194
200
  - test/unit/search_sort_test.rb
195
201
  - test/unit/search_test.rb
@@ -238,6 +244,7 @@ test_files:
238
244
  - test/fixtures/articles/5.json
239
245
  - test/integration/facets_test.rb
240
246
  - test/integration/filters_test.rb
247
+ - test/integration/highlight_test.rb
241
248
  - test/integration/index_mapping_test.rb
242
249
  - test/integration/index_store_test.rb
243
250
  - test/integration/query_string_test.rb
@@ -248,10 +255,12 @@ test_files:
248
255
  - test/unit/client_test.rb
249
256
  - test/unit/configuration_test.rb
250
257
  - test/unit/index_test.rb
258
+ - test/unit/logger_test.rb
251
259
  - test/unit/results_collection_test.rb
252
260
  - test/unit/results_item_test.rb
253
261
  - test/unit/search_facet_test.rb
254
262
  - test/unit/search_filter_test.rb
263
+ - test/unit/search_highlight_test.rb
255
264
  - test/unit/search_query_test.rb
256
265
  - test/unit/search_sort_test.rb
257
266
  - test/unit/search_test.rb