slingshot-rb 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -5,3 +5,4 @@ pkg/*
5
5
  rdoc/
6
6
  coverage/
7
7
  scratch/
8
+ examples/*.html
data/Rakefile CHANGED
@@ -50,3 +50,25 @@ rescue LoadError
50
50
  abort "RCov is not available. In order to run rcov, you must: sudo gem install rcov"
51
51
  end
52
52
  end
53
+
54
+ namespace :web do
55
+
56
+ desc "Update the Github website"
57
+ task :update => :generate do
58
+ current_branch = `git branch --no-color`.split("\n").select { |line| line =~ /^\* / }.to_s.gsub(/\* (.*)/, '\1')
59
+ (puts "Unable to determine current branch"; exit(1) ) unless current_branch
60
+ system "git stash save && git checkout web"
61
+ system "cp examples/slingshot-dsl.html index.html"
62
+ system "git add index.html && git co -m 'Updated Slingshot website'"
63
+ system "git push origin web:gh-pages -f"
64
+ system "git checkout #{current_branch} && git stash pop"
65
+ end
66
+
67
+ desc "Generate the Rocco documentation page"
68
+ task :generate do
69
+ system "rocco examples/slingshot-dsl.rb"
70
+ html = File.read('examples/slingshot-dsl.html').gsub!(/slingshot\-dsl\.rb/, 'slingshot.rb')
71
+ File.open('examples/slingshot-dsl.html', 'w') { |f| f.write html }
72
+ system "open examples/slingshot-dsl.html"
73
+ end
74
+ end
@@ -0,0 +1,378 @@
1
+ # **Slingshot** is a rich and comfortable Ruby API and DSL for the
2
+ # [_ElasticSearch_](http://www.elasticsearch.org/) search engine/database.
3
+ #
4
+ # _ElasticSearch_ is a scalable, distributed, highly-available,
5
+ # RESTful database communicating by JSON over HTTP, based on [Lucene](http://lucene.apache.org/),
6
+ # written in Java. It manages to be very simple and very powerful at the same time.
7
+ #
8
+ # By following these instructions you should have the search running
9
+ # on a sane operation system in less then 10 minutes.
10
+
11
+ #### Installation
12
+
13
+ # Install Slingshot with Rubygems.
14
+ #
15
+ # gem install slingshot-rb
16
+ #
17
+ require 'rubygems'
18
+ require 'slingshot'
19
+
20
+ #### Prerequisites
21
+
22
+ # You'll need a working and running _ElasticSearch_ server. Thankfully, that's easy.
23
+ ( puts <<-"INSTALL" ; exit(1) ) unless RestClient.get('http://localhost:9200') rescue false
24
+ [!] You don’t appear to have ElasticSearch installed. Please install and launch it with the following commands.
25
+ curl -k -L -o elasticsearch-0.15.0.tar.gz http://github.com/downloads/elasticsearch/elasticsearch/elasticsearch-0.15.0.tar.gz
26
+ tar -zxvf elasticsearch-0.15.0.tar.gz
27
+ ./elasticsearch-0.15.0/bin/elasticsearch -f
28
+ INSTALL
29
+
30
+ ### Simple Usage
31
+
32
+ #### Storing and indexing documents
33
+
34
+ # Let's initialize an index named “articles”.
35
+ Slingshot.index 'articles' do
36
+ # To make sure it's fresh, let's delete any existing index with the same name.
37
+ delete
38
+ # And then, let's create it.
39
+ create
40
+
41
+ # We want to store and index some articles with title and tags. Simple Hashes are OK.
42
+ store :title => 'One', :tags => ['ruby'], :published_on => '2011-01-01'
43
+ store :title => 'Two', :tags => ['ruby', 'python'], :published_on => '2011-01-02'
44
+ store :title => 'Three', :tags => ['java'], :published_on => '2011-01-02'
45
+ store :title => 'Four', :tags => ['ruby', 'php'], :published_on => '2011-01-03'
46
+
47
+ # We force refreshing the index, so we can query it immediately.
48
+ refresh
49
+ end
50
+
51
+ # We may want to define a specific [mapping](http://www.elasticsearch.org/guide/reference/api/admin-indices-create-index.html)
52
+ # for the index.
53
+
54
+ Slingshot.index 'articles' do
55
+ # To do so, just pass a Hash containing the specified mapping to the `Index#create` method.
56
+ create :mappings => {
57
+ # Specify for which type of documents this mapping should be used (`article` in this case).
58
+ :article => {
59
+ :properties => {
60
+ # Specify the type of the field, whether it should be analyzed, etc.
61
+ :id => { :type => 'string', :index => 'not_analyzed', :include_in_all => false },
62
+ # Set the boost or analyzer settings for the field, et cetera. The _ElasticSearch_ guide
63
+ # has [more information](http://elasticsearch.org/guide/reference/mapping/index.html).
64
+ :title => { :type => 'string', :boost => 2.0, :analyzer => 'snowball' },
65
+ :tags => { :type => 'string', :analyzer => 'keyword' },
66
+ :content => { :type => 'string', :analyzer => 'snowball' }
67
+ }
68
+ }
69
+ }
70
+ end
71
+
72
+
73
+
74
+ #### Searching
75
+
76
+ # With the documents indexed and stored in the _ElasticSearch_ database, we want to search for them.
77
+ #
78
+ # Slingshot exposes the search interface via simple domain-specific language.
79
+
80
+
81
+ ##### Simple Query String Searches
82
+
83
+ # We can do simple searches, like searching for articles containing “One” in their title.
84
+ s = Slingshot.search('articles') do
85
+ query do
86
+ string "title:One"
87
+ end
88
+ end
89
+
90
+ # The results:
91
+ # * One [tags: ruby]
92
+ s.results.each do |document|
93
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
94
+ end
95
+
96
+ # Of course, we may write the blocks in shorter notation.
97
+
98
+ # Let's search for articles whose titles begin with letter “T”.
99
+ s = Slingshot.search('articles') { query { string "title:T*" } }
100
+
101
+ # The results:
102
+ # * Two [tags: ruby, python]
103
+ # * Three [tags: java]
104
+ s.results.each do |document|
105
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
106
+ end
107
+
108
+ # We can use any valid [Lucene query syntax](http://lucene.apache.org/java/3_0_3/queryparsersyntax.html)
109
+ # for the query string queries.
110
+
111
+ # For debugging, we can display the JSON which is being sent to _ElasticSearch_.
112
+ #
113
+ # {"query":{"query_string":{"query":"title:T*"}}}
114
+ #
115
+ puts "", "Query:", "-"*80
116
+ puts s.to_json
117
+
118
+ # Or better, we may display a complete `curl` command, so we can execute it in terminal
119
+ # to see the raw output, tweak params and debug any problems.
120
+ #
121
+ # curl -X POST "http://localhost:9200/articles/_search?pretty=true" \
122
+ # -d '{"query":{"query_string":{"query":"title:T*"}}}'
123
+ #
124
+ puts "", "Try the query in Curl:", "-"*80
125
+ puts s.to_curl
126
+
127
+
128
+ ##### Other Types of Queries
129
+
130
+ # Of course, we may want to define our queries more expressively, for instance
131
+ # when we're searching for articles with specific _tags_.
132
+
133
+ # Let's suppose we want to search for articles tagged “ruby” _or_ “python”.
134
+ # That's a great excuse to use a [_terms_](http://elasticsearch.org/guide/reference/query-dsl/terms-query.html)
135
+ # query.
136
+ s = Slingshot.search('articles') do
137
+ query do
138
+ terms :tags, ['ruby', 'python']
139
+ end
140
+ end
141
+
142
+ # The search, as expected, returns three articles, all tagged “ruby” — among other tags:
143
+ #
144
+ # * Two [tags: ruby, python]
145
+ # * One [tags: ruby]
146
+ # * Four [tags: ruby, php]
147
+ s.results.each do |document|
148
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
149
+ end
150
+
151
+ # What if we wanted to search for articles tagged both “ruby” _and_ “python”.
152
+ # That's a great excuse to specify `minimum_match` for the query.
153
+ s = Slingshot.search('articles') do
154
+ query do
155
+ terms :tags, ['ruby', 'python'], :minimum_match => 2
156
+ end
157
+ end
158
+
159
+ # The search, as expected, returns one article, tagged with _both_ “ruby” and “python”:
160
+ #
161
+ # * Two [tags: ruby, python]
162
+ s.results.each do |document|
163
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
164
+ end
165
+
166
+ # _ElasticSearch_ allows us to do many more types of queries.
167
+ # Eventually, _Slingshot_ will support all of them.
168
+ # So far, only these are supported:
169
+ #
170
+ # * [term](http://elasticsearch.org/guide/reference/query-dsl/term-query.html)
171
+ # * [terms](http://elasticsearch.org/guide/reference/query-dsl/terms-query.html)
172
+
173
+ ##### Faceted Search
174
+
175
+ # _ElasticSearch_ makes it trivial to retrieve complex aggregated data from our index/database,
176
+ # so called [_facets_](http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Faceted-Search-Solr).
177
+
178
+ # Let's say we want to display article counts for every tag in the database.
179
+ # For that, we'll use a _terms_ facet.
180
+
181
+ #
182
+ s = Slingshot.search 'articles' do
183
+ # We will search for articles whose title begins with letter “T”,
184
+ query { string 'title:T*' }
185
+
186
+ # and retrieve their counts “bucketed” by their `tags`.
187
+ facet 'tags' do
188
+ terms :tags
189
+ end
190
+ end
191
+
192
+ # As we see, our query has found two articles, and if you recall our articles from above,
193
+ # _Two_ is tagged with “ruby” and “python”, _Three_ is tagged with “java”. So the counts
194
+ # won't surprise us:
195
+ # Found 2 articles: Three, Two
196
+ # Counts:
197
+ # -------
198
+ # ruby 1
199
+ # python 1
200
+ # java 1
201
+ puts "Found #{s.results.count} articles: #{s.results.map(&:title).join(', ')}"
202
+ puts "Counts based on tags:", "-"*25
203
+ s.results.facets['tags']['terms'].each do |f|
204
+ puts "#{f['term'].ljust(10)} #{f['count']}"
205
+ end
206
+
207
+ # These counts are based on the scope of our current query (called `main` in _ElasticSearch_).
208
+ # What if we wanted to display aggregated counts by `tags` across the whole database?
209
+
210
+ #
211
+ s = Slingshot.search 'articles' do
212
+ query { string 'title:T*' }
213
+
214
+ facet 'global-tags' do
215
+ # That's where the `global` scope for a facet comes in.
216
+ terms :tags, :global => true
217
+ end
218
+
219
+ # As you can see, we can even combine facets scoped
220
+ # to the current query with global facets.
221
+ facet 'current-tags' do
222
+ terms :tags
223
+ end
224
+ end
225
+
226
+ # Aggregated results for the current query are the same as previously:
227
+ # Current query facets:
228
+ # -------------------------
229
+ # ruby 1
230
+ # python 1
231
+ # java 1
232
+ puts "Current query facets:", "-"*25
233
+ s.results.facets['current-tags']['terms'].each do |f|
234
+ puts "#{f['term'].ljust(10)} #{f['count']}"
235
+ end
236
+
237
+ # As we see, aggregated results for the global scope include also
238
+ # tags for articles not matched by the query, such as “java” or “php”:
239
+ # Global facets:
240
+ # -------------------------
241
+ # ruby 3
242
+ # python 1
243
+ # php 1
244
+ # java 1
245
+ puts "Global facets:", "-"*25
246
+ s.results.facets['global-tags']['terms'].each do |f|
247
+ puts "#{f['term'].ljust(10)} #{f['count']}"
248
+ end
249
+
250
+ # The real power of facets lies in their combination with
251
+ # [filters](http://elasticsearch.karmi.cz/guide/reference/api/search/filter.html),
252
+ # though:
253
+
254
+ # > When doing things like facet navigation,
255
+ # > sometimes only the hits are needed to be filtered by the chosen facet,
256
+ # > and all the facets should continue to be calculated based on the original query.
257
+
258
+
259
+ ##### Filtered Search
260
+
261
+ # So, let's make out search a bit complex. Let's search for articles whose titles begin
262
+ # with letter “T”, again, but filter the results, so only the articles tagged “ruby”
263
+ # are returned.
264
+ s = Slingshot.search 'articles' do
265
+
266
+ # We use the same **query** as before.
267
+ query { string 'title:T*' }
268
+
269
+ # And add a _terms_ **filter** based on tags.
270
+ filter :terms, :tags => ['ruby']
271
+
272
+ # And, of course, our facet definition.
273
+ facet('tags') { terms :tags }
274
+
275
+ end
276
+
277
+ # We see that only the article _Two_ (tagged “ruby” and “python”) was returned,
278
+ # _not_ the article _Three_ (tagged “java”):
279
+ #
280
+ # * Two [tags: ruby, python]
281
+ s.results.each do |document|
282
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
283
+ end
284
+
285
+ # However, count for article _Three_'s tags, “java”, _is_ in fact included in facets:
286
+ #
287
+ # Counts based on tags:
288
+ # -------------------------
289
+ # ruby 1
290
+ # python 1
291
+ # java 1
292
+ puts "Counts based on tags:", "-"*25
293
+ s.results.facets['tags']['terms'].each do |f|
294
+ puts "#{f['term'].ljust(10)} #{f['count']}"
295
+ end
296
+
297
+
298
+ ##### Sorting
299
+
300
+ # By default, the results are sorted according to their relevancy
301
+ # (available as the `_score` property).
302
+
303
+ # But, what if we want to sort the results based on some other criteria,
304
+ # such as published date, price, etc? We can do that.
305
+ s = Slingshot.search 'articles' do
306
+ # We search for articles tagged “ruby”
307
+ query { string 'tags:ruby' }
308
+
309
+ # And sort them by their `title`, in descending order.
310
+ sort { title 'desc' }
311
+ end
312
+
313
+ # The results:
314
+ # * Two
315
+ # * One
316
+ # * Four
317
+ s.results.each do |document|
318
+ puts "* #{ document.title }"
319
+ end
320
+
321
+ # Of course, it's possible to combine more fields in the sorting definition.
322
+
323
+ s = Slingshot.search 'articles' do
324
+ # We will just get all articles for this case.
325
+ query { string '*' }
326
+
327
+ sort do
328
+ # We will sort the results by their `published_on` property in ascending (default) order,
329
+ published_on
330
+ # and by their `title` property, in descending order.
331
+ title 'desc'
332
+ end
333
+ end
334
+
335
+ # The results:
336
+ # * One (Published on: 2011-01-01)
337
+ # * Two (Published on: 2011-01-02)
338
+ # * Three (Published on: 2011-01-02)
339
+ # * Four (Published on: 2011-01-03)
340
+ s.results.each do |document|
341
+ puts "* #{ document.title.ljust(10) } (Published on: #{ document.published_on })"
342
+ end
343
+
344
+ ##### Highlighting
345
+
346
+ # Often, you want to highlight the matched snippets in your text.
347
+ # _ElasticSearch_ provides many features for
348
+ # [highlighting](http://www.elasticsearch.org/guide/reference/api/search/highlighting.html),
349
+ #
350
+ s = Slingshot.search 'articles' do
351
+ # Let's search for documents containing “Two” in their titles.
352
+ query { string 'title:Two' }
353
+
354
+ # And use the `highlight` method.
355
+ highlight :title
356
+ end
357
+
358
+ # The results:
359
+ # Title: Two, highlighted title: <em>Two</em>
360
+ s.results.each do |document|
361
+ puts "Title: #{ document.title }, highlighted title: #{document.highlight.title}"
362
+ end
363
+
364
+ # Slingshot allows you to specify options for the highlighting, such as:
365
+ #
366
+ s = Slingshot.search 'articles' do
367
+ query { string 'title:Two' }
368
+
369
+ # • specifying the fields to highlight
370
+ highlight :title, :body
371
+
372
+ # • specifying their options
373
+ highlight :title, :body => { :number_of_fragments => 0 }
374
+
375
+ # • or specifying highlighting options, such as the wrapper tag
376
+ highlight :title, :body, :options => { :tag => '<strong class="highlight">' }
377
+ end
378
+
data/lib/slingshot.rb CHANGED
@@ -2,6 +2,7 @@ require 'rest_client'
2
2
  require 'yajl/json_gem'
3
3
 
4
4
  require 'slingshot/rubyext/hash'
5
+ require 'slingshot/logger'
5
6
  require 'slingshot/configuration'
6
7
  require 'slingshot/client'
7
8
  require 'slingshot/client'
@@ -10,6 +11,7 @@ require 'slingshot/search/query'
10
11
  require 'slingshot/search/sort'
11
12
  require 'slingshot/search/facet'
12
13
  require 'slingshot/search/filter'
14
+ require 'slingshot/search/highlight'
13
15
  require 'slingshot/results/collection'
14
16
  require 'slingshot/results/item'
15
17
  require 'slingshot/index'
@@ -14,6 +14,11 @@ module Slingshot
14
14
  @wrapper = klass || @wrapper || Results::Item
15
15
  end
16
16
 
17
+ def self.logger(device=nil, options={})
18
+ return @logger = Logger.new(device, options) if device
19
+ @logger || nil
20
+ end
21
+
17
22
  def self.reset(*properties)
18
23
  reset_variables = properties.empty? ? instance_variables : instance_variables & properties.map { |p| "@#{p}" }
19
24
  reset_variables.each { |v| instance_variable_set(v, nil) }
@@ -0,0 +1,61 @@
1
+ module Slingshot
2
+ class Logger
3
+
4
+ def initialize(device, options={})
5
+ @device = if device.respond_to?(:write)
6
+ device
7
+ else
8
+ File.open(device, 'a')
9
+ end
10
+ @device.sync = true
11
+ @options = options
12
+ at_exit { @device.close unless @device.closed? }
13
+ end
14
+
15
+ def level
16
+ @options[:level] || 'info'
17
+ end
18
+
19
+ def write(message)
20
+ @device.write message
21
+ end
22
+
23
+ def log_request(endpoint, params=nil, curl='')
24
+ # [_search] (articles,users) 2001-02-12 18:20:42:32
25
+ #
26
+ # curl -X POST ....
27
+ #
28
+ content = "# [#{endpoint}] "
29
+ content += "(#{params.inspect}) " if params
30
+ content += time
31
+ content += "\n#\n"
32
+ content += curl
33
+ content += "\n\n"
34
+ write content
35
+ end
36
+
37
+ def log_response(status, json)
38
+ # [200 OK] (4 msec) Sat Feb 12 19:20:47 2011
39
+ #
40
+ # {
41
+ # "took" : 4,
42
+ # "hits" : [...]
43
+ # ...
44
+ # }
45
+ #
46
+ took = JSON.parse(json)['took'] rescue nil
47
+ content = "# [#{status}] "
48
+ content += "(#{took} msec) " if took
49
+ content += time
50
+ content += "\n#\n"
51
+ json.each_line { |line| content += "# #{line}" }
52
+ content += "\n\n"
53
+ write content
54
+ end
55
+
56
+ def time
57
+ Time.now.strftime('%Y-%m-%d %H:%M:%S:%L')
58
+ end
59
+
60
+ end
61
+ end
@@ -13,6 +13,7 @@ module Slingshot
13
13
  h
14
14
  else
15
15
  document = h['_source'] ? h['_source'] : h['fields']
16
+ document['highlight'] = h['highlight'] if h['highlight']
16
17
  h.update document if document
17
18
  Configuration.wrapper.new(h)
18
19
  end
@@ -3,7 +3,7 @@ module Slingshot
3
3
 
4
4
  class Search
5
5
 
6
- attr_reader :indices, :url, :results, :response, :query, :facets, :filters
6
+ attr_reader :indices, :url, :results, :response, :json, :query, :facets, :filters
7
7
 
8
8
  def initialize(*indices, &block)
9
9
  @options = indices.pop if indices.last.is_a?(Hash)
@@ -37,6 +37,15 @@ module Slingshot
37
37
  self
38
38
  end
39
39
 
40
+ def highlight(*args)
41
+ unless args.empty?
42
+ @highlight = Highlight.new(*args)
43
+ self
44
+ else
45
+ @highlight
46
+ end
47
+ end
48
+
40
49
  def from(value)
41
50
  @from = value
42
51
  self
@@ -53,17 +62,33 @@ module Slingshot
53
62
  end
54
63
 
55
64
  def perform
56
- @url = "#{Configuration.url}/#{indices.join(',')}/_search"
57
- @response = JSON.parse( Configuration.client.post(@url, self.to_json) )
58
- @results = Results::Collection.new(@response)
65
+ @url = "#{Configuration.url}/#{indices.join(',')}/_search"
66
+ @response = Configuration.client.post(@url, self.to_json)
67
+ @json = Yajl::Parser.parse(@response)
68
+ @results = Results::Collection.new(@json)
59
69
  self
60
- rescue Exception
61
- STDERR.puts "Request failed: \n#{self.to_curl}"
70
+ rescue Exception => e
71
+ STDERR.puts "[REQUEST FAILED]\n#{self.to_curl}\n"
62
72
  raise
73
+ ensure
74
+ if Configuration.logger
75
+ Configuration.logger.log_request '_search', indices, to_curl
76
+ if Configuration.logger.level == 'debug'
77
+ # FIXME: Depends on RestClient implementation
78
+ if @response
79
+ code = @response.code
80
+ body = Yajl::Encoder.encode(@json, :pretty => true)
81
+ else
82
+ code = e.message
83
+ body = e.http_body
84
+ end
85
+ Configuration.logger.log_response code, body
86
+ end
87
+ end
63
88
  end
64
89
 
65
90
  def to_curl
66
- %Q|curl -X POST "http://localhost:9200/#{indices}/_search?pretty=true" -d '#{self.to_json}'|
91
+ %Q|curl -X POST "#{Configuration.url}/#{indices}/_search?pretty=true" -d '#{self.to_json}'|
67
92
  end
68
93
 
69
94
  def to_json
@@ -72,6 +97,7 @@ module Slingshot
72
97
  request.update( { :sort => @sort } ) if @sort
73
98
  request.update( { :facets => @facets } ) if @facets
74
99
  @filters.each { |filter| request.update( { :filter => filter } ) } if @filters
100
+ request.update( { :highlight => @highlight } ) if @highlight
75
101
  request.update( { :size => @size } ) if @size
76
102
  request.update( { :from => @from } ) if @from
77
103
  request.update( { :fields => @fields } ) if @fields
@@ -3,7 +3,7 @@ module Slingshot
3
3
 
4
4
  #--
5
5
  # TODO: Implement all elastic search facets (geo, histogram, range, etc)
6
- # https://github.com/elasticsearch/elasticsearch/wiki/Search-API-Facets
6
+ # http://elasticsearch.org/guide/reference/api/search/facets/
7
7
  #++
8
8
 
9
9
  class Facet
@@ -14,8 +14,13 @@ module Slingshot
14
14
  self.instance_eval(&block) if block_given?
15
15
  end
16
16
 
17
- def terms(field, options={})
18
- @value = { :terms => { :field => field } }.update(options)
17
+ def terms(field, size=10, options={})
18
+ @value = { :terms => { :field => field, :size => size } }.update(options)
19
+ self
20
+ end
21
+
22
+ def date(field, interval='day', options={})
23
+ @value = { :date_histogram => { :field => field, :interval => interval } }.update(options)
19
24
  self
20
25
  end
21
26
 
@@ -0,0 +1,39 @@
1
+ module Slingshot
2
+ module Search
3
+
4
+ # http://www.elasticsearch.org/guide/reference/api/search/highlighting.html
5
+ #
6
+ class Highlight
7
+
8
+ def initialize(*args)
9
+ @options = (args.last.is_a?(Hash) && args.last.delete(:options)) || {}
10
+ extract_highlight_tags
11
+ @fields = args.inject({}) do |result, field|
12
+ field.is_a?(Hash) ? result.update(field) : result[field.to_sym] = {}; result
13
+ end
14
+ end
15
+
16
+ def to_json
17
+ to_hash.to_json
18
+ end
19
+
20
+ def to_hash
21
+ h = { :fields => @fields }
22
+ h.update @options
23
+ return h
24
+ end
25
+
26
+ private
27
+
28
+ def extract_highlight_tags
29
+ if tag = @options.delete(:tag)
30
+ @options.update \
31
+ :pre_tags => [tag],
32
+ :post_tags => [tag.to_s.gsub(/^<([a-z]+).*/, '</\1>')]
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ end
39
+ end
@@ -23,6 +23,11 @@ module Slingshot
23
23
  @value
24
24
  end
25
25
 
26
+ def all
27
+ @value = { :match_all => {} }
28
+ @value
29
+ end
30
+
26
31
  def to_json
27
32
  @value.to_json
28
33
  end
@@ -1,3 +1,3 @@
1
1
  module Slingshot
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.7"
3
3
  end
@@ -1 +1 @@
1
- {"title" : "One", "tags" : ["ruby"]}
1
+ {"title" : "One", "tags" : ["ruby"], "published_on" : "2011-01-01"}
@@ -1 +1 @@
1
- {"title" : "Two", "tags" : ["ruby", "python"]}
1
+ {"title" : "Two", "tags" : ["ruby", "python"], "published_on" : "2011-01-02"}
@@ -1 +1 @@
1
- {"title" : "Three", "tags" : ["java"]}
1
+ {"title" : "Three", "tags" : ["java"], "published_on" : "2011-01-02"}
@@ -1 +1 @@
1
- {"title" : "Four", "tags" : ["erlang"]}
1
+ {"title" : "Four", "tags" : ["erlang"], "published_on" : "2011-01-03"}
@@ -1 +1 @@
1
- {"title" : "Five", "tags" : ["javascript", "java"]}
1
+ {"title" : "Five", "tags" : ["javascript", "java"], "published_on" : "2011-01-04"}
@@ -1,4 +1,5 @@
1
1
  require 'test_helper'
2
+ require 'date'
2
3
 
3
4
  module Slingshot
4
5
 
@@ -40,6 +41,23 @@ module Slingshot
40
41
  assert_equal 5, global_facets.count
41
42
  end
42
43
 
44
+ context "date histogram" do
45
+
46
+ should "return aggregated values for all results" do
47
+ s = Slingshot.search('articles-test') do
48
+ query { all }
49
+ facet 'published_on' do
50
+ date :published_on
51
+ end
52
+ end
53
+
54
+ facets = s.results.facets['published_on']['entries']
55
+ assert_equal 4, facets.size, facets.inspect
56
+ assert_equal 2, facets.entries[1]["count"], facets.inspect
57
+ end
58
+
59
+ end
60
+
43
61
  end
44
62
 
45
63
  end
@@ -0,0 +1,26 @@
1
+ require 'test_helper'
2
+
3
+ module Slingshot
4
+
5
+ class HighlightIntegrationTest < Test::Unit::TestCase
6
+ include Test::Integration
7
+
8
+ context "Highlight" do
9
+
10
+ should "add 'highlight' field to the result item" do
11
+ Slingshot::Configuration.logger STDERR, :level => 'debug'
12
+ s = Slingshot.search('articles-test') do
13
+ # query { string '-w' }
14
+ highlight :title
15
+ end
16
+
17
+ doc = s.results.first
18
+
19
+ assert_equal 1, doc.highlight.title.size
20
+ assert doc.highlight.title.to_s.include?('<em>'), "Highlight does not include default highlight tag"
21
+ end
22
+
23
+ end
24
+
25
+ end
26
+ end
@@ -10,6 +10,10 @@ module Slingshot
10
10
  Configuration.instance_variable_set(:@client, nil)
11
11
  end
12
12
 
13
+ teardown do
14
+ Configuration.reset
15
+ end
16
+
13
17
  should "return default URL" do
14
18
  assert_equal 'http://localhost:9200', Configuration.url
15
19
  end
@@ -28,6 +32,16 @@ module Slingshot
28
32
  assert_equal Client::Base, Configuration.client
29
33
  end
30
34
 
35
+ should "return nil as logger by default" do
36
+ assert_nil Configuration.logger
37
+ end
38
+
39
+ should "return set and return logger" do
40
+ Configuration.logger STDERR
41
+ assert_not_nil Configuration.logger
42
+ assert_instance_of Slingshot::Logger, Configuration.logger
43
+ end
44
+
31
45
  should "allow to reset the configuration for specific property" do
32
46
  Configuration.url 'http://example.com'
33
47
  Configuration.client Client::Base
@@ -0,0 +1,114 @@
1
+ require 'test_helper'
2
+ require 'time'
3
+
4
+ module Slingshot
5
+
6
+ class LoggerTest < Test::Unit::TestCase
7
+ include Slingshot
8
+
9
+ context "Logger" do
10
+
11
+ context "initialized with an IO object" do
12
+
13
+ should "take STDOUT" do
14
+ assert_nothing_raised do
15
+ logger = Logger.new STDOUT
16
+ end
17
+ end
18
+
19
+ should "write to STDERR" do
20
+ STDERR.expects(:write).with('BOOM!')
21
+ logger = Logger.new STDERR
22
+ logger.write('BOOM!')
23
+ end
24
+
25
+ end
26
+
27
+ context "initialized with file" do
28
+ teardown { File.delete('myfile.log') }
29
+
30
+ should "create the file" do
31
+ assert_nothing_raised do
32
+ logger = Logger.new 'myfile.log'
33
+ assert File.exists?('myfile.log')
34
+ end
35
+ end
36
+
37
+ should "write to file" do
38
+ File.any_instance.expects(:write).with('BOOM!')
39
+ logger = Logger.new 'myfile.log'
40
+ logger.write('BOOM!')
41
+ end
42
+
43
+ end
44
+
45
+ end
46
+
47
+ context "levels" do
48
+
49
+ should "have default level" do
50
+ logger = Logger.new STDERR
51
+ assert_equal 'info', logger.level
52
+ end
53
+
54
+ should "have set the level" do
55
+ logger = Logger.new STDERR, :level => 'debug'
56
+ assert_equal 'debug', logger.level
57
+ end
58
+
59
+ end
60
+
61
+ context "tracing requests" do
62
+ setup do
63
+ Time.stubs(:now).returns(Time.parse('2011-03-19 11:00'))
64
+ @logger = Logger.new STDERR
65
+ end
66
+
67
+ should "log request in correct format" do
68
+ log = (<<-"log;").gsub(/^ +/, '')
69
+ # [_search] (["articles", "users"]) 2011-03-19 11:00:00:L
70
+ #
71
+ curl -X GET http://...
72
+
73
+ log;
74
+ @logger.expects(:write).with(log)
75
+ @logger.log_request('_search', ["articles", "users"], 'curl -X GET http://...')
76
+ end
77
+
78
+ should "log response in correct format" do
79
+ json = (<<-"json;").gsub(/^\s*/, '')
80
+ {
81
+ "took" : 4,
82
+ "hits" : {
83
+ "total" : 20,
84
+ "max_score" : 1.0,
85
+ "hits" : [ {
86
+ "_index" : "articles",
87
+ "_type" : "article",
88
+ "_id" : "Hmg0B0VSRKm2VAlsasdnqg",
89
+ "_score" : 1.0, "_source" : { "title" : "Article 1", "published_on" : "2011-01-01" }
90
+ }, {
91
+ "_index" : "articles",
92
+ "_type" : "article",
93
+ "_id" : "booSWC8eRly2I06GTUilNA",
94
+ "_score" : 1.0, "_source" : { "title" : "Article 2", "published_on" : "2011-01-12" }
95
+ }
96
+ ]
97
+ }
98
+ }
99
+ json;
100
+ log = (<<-"log;").gsub(/^\s*/, '')
101
+ # [200 OK] (4 msec) 2011-03-19 11:00:00:L
102
+ #
103
+ log;
104
+ # log += json.split.map { |line| "# #{line}" }.join("\n")
105
+ json.each_line { |line| log += "# #{line}" }
106
+ log += "\n\n"
107
+ @logger.expects(:write).with(log)
108
+ @logger.log_response('200 OK', json)
109
+ end
110
+
111
+ end
112
+
113
+ end
114
+ end
@@ -13,26 +13,33 @@ module Slingshot::Search
13
13
  context "generally" do
14
14
 
15
15
  should "encode facets with defaults for current query" do
16
- assert_equal( { :foo => { :terms => {:field=>'bar'} } }.to_json, Facet.new('foo').terms(:bar).to_json )
16
+ assert_equal( { :foo => { :terms => {:field=>'bar',:size=>10} } }.to_json, Facet.new('foo').terms(:bar).to_json )
17
17
  end
18
18
 
19
19
  should "encode facets as global" do
20
- assert_equal( { :foo => { :terms => {:field=>'bar'}, :global => true } }.to_json,
20
+ assert_equal( { :foo => { :terms => {:field=>'bar',:size=>10}, :global => true } }.to_json,
21
21
  Facet.new('foo', :global => true).terms(:bar).to_json )
22
22
  end
23
23
 
24
24
  should "encode facet options" do
25
- assert_equal( { :foo => { :terms => {:field=>'bar'}, :size => 5 } }.to_json,
26
- Facet.new('foo', :size => 5).terms(:bar).to_json )
27
- assert_equal( { :foo => { :terms => {:field=>'bar'}, :size => 5 } }.to_json,
28
- Facet.new('foo').terms(:bar, :size => 5).to_json )
25
+ assert_equal( { :foo => { :terms => {:field=>'bar',:size=>5} } }.to_json,
26
+ Facet.new('foo').terms(:bar, 5).to_json )
29
27
  end
30
28
 
31
29
  should "encode facets when passed as a block" do
32
30
  f = Facet.new('foo') do
33
31
  terms :bar
34
32
  end
35
- assert_equal( { :foo => { :terms => {:field=>'bar'} } }.to_json, f.to_json )
33
+ assert_equal( { :foo => { :terms => {:field=>'bar',:size=>10} } }.to_json, f.to_json )
34
+ end
35
+
36
+ end
37
+
38
+ context "date histogram" do
39
+
40
+ should "encode the JSON" do
41
+ f = Facet.new('date') { date :published_on, 'day' }
42
+ assert_equal({ :date => { :date_histogram => { :field => 'published_on', :interval => 'day' } } }.to_json, f.to_json)
36
43
  end
37
44
 
38
45
  end
@@ -0,0 +1,46 @@
1
+ require 'test_helper'
2
+
3
+ module Slingshot::Search
4
+
5
+ class HighlightTest < Test::Unit::TestCase
6
+
7
+ context "Highlight" do
8
+
9
+ should "be serialized to JSON" do
10
+ assert_respond_to Highlight.new(:body), :to_json
11
+ end
12
+
13
+ should "specify highlight for single field" do
14
+ assert_equal( {:fields => { :body => {} }}.to_json,
15
+ Highlight.new(:body).to_json )
16
+ end
17
+
18
+ should "specify highlight for more fields" do
19
+ assert_equal( {:fields => { :title => {}, :body => {} }}.to_json,
20
+ Highlight.new(:title, :body).to_json )
21
+ end
22
+
23
+ should "specify highlight for more fields with options" do
24
+ assert_equal( {:fields => { :title => {}, :body => { :a => 1, :b => 2 } }}.to_json,
25
+ Highlight.new(:title, :body => { :a => 1, :b => 2 }).to_json )
26
+ end
27
+
28
+ should "specify highlight for more fields with highlight options" do
29
+ # p Highlight.new(:title, :body => {}, :options => { :tag => '<strong>' }).to_hash
30
+ assert_equal( {:fields => { :title => {}, :body => {} }, :pre_tags => ['<strong>'], :post_tags => ['</strong>'] }.to_json,
31
+ Highlight.new(:title, :body => {}, :options => { :tag => '<strong>' }).to_json )
32
+ end
33
+
34
+ context "with custom tags" do
35
+
36
+ should "properly parse tags with class" do
37
+ assert_equal( {:fields => { :title => {} }, :pre_tags => ['<strong class="highlight">'], :post_tags => ['</strong>'] }.to_json,
38
+ Highlight.new(:title, :options => { :tag => '<strong class="highlight">' }).to_json )
39
+ end
40
+
41
+ end
42
+
43
+ end
44
+
45
+ end
46
+ end
@@ -33,6 +33,10 @@ module Slingshot::Search
33
33
  Query.new.string('foo', :default_field => 'title') )
34
34
  end
35
35
 
36
+ should "search for all documents" do
37
+ assert_equal( { :match_all => { } }, Query.new.all )
38
+ end
39
+
36
40
  end
37
41
 
38
42
  end
@@ -5,6 +5,7 @@ module Slingshot
5
5
  class SearchTest < Test::Unit::TestCase
6
6
 
7
7
  context "Search" do
8
+ setup { Configuration.reset :logger }
8
9
 
9
10
  should "be initialized with index/indices" do
10
11
  assert_raise(ArgumentError) { Search::Search.new }
@@ -54,6 +55,17 @@ module Slingshot
54
55
  assert_raise(RestClient::InternalServerError) { s.perform }
55
56
  end
56
57
 
58
+ should "log request, but not response, when logger is set" do
59
+ Configuration.logger STDERR
60
+
61
+ Configuration.client.expects(:post).returns('{"hits":[]}')
62
+ Results::Collection.expects(:new).returns([])
63
+ Configuration.logger.expects(:log_request).returns(true)
64
+ Configuration.logger.expects(:log_response).never
65
+
66
+ Search::Search.new('index').perform
67
+ end
68
+
57
69
  context "sort" do
58
70
 
59
71
  should "allow sorting by multiple fields" do
@@ -71,7 +83,7 @@ module Slingshot
71
83
 
72
84
  context "facets" do
73
85
 
74
- should "allow searching for facets" do
86
+ should "retrieve terms facets" do
75
87
  s = Search::Search.new('index') do
76
88
  facet('foo1') { terms :bar, :global => true }
77
89
  facet('foo2', :global => true) { terms :bar }
@@ -83,6 +95,14 @@ module Slingshot
83
95
  assert_not_nil s.facets['foo3']
84
96
  end
85
97
 
98
+ should "retrieve date histogram facets" do
99
+ s = Search::Search.new('index') do
100
+ facet('date') { date :published_on }
101
+ end
102
+ assert_equal 1, s.facets.keys.size
103
+ assert_not_nil s.facets['date']
104
+ end
105
+
86
106
  end
87
107
 
88
108
  context "filter" do
@@ -99,6 +119,37 @@ module Slingshot
99
119
 
100
120
  end
101
121
 
122
+ context "highlight" do
123
+
124
+ should "allow to specify highlight for single field" do
125
+ s = Search::Search.new('index') do
126
+ highlight :body
127
+ end
128
+
129
+ assert_not_nil s.highlight
130
+ assert_instance_of Slingshot::Search::Highlight, s.highlight
131
+ end
132
+
133
+ should "allow to specify highlight for more fields" do
134
+ s = Search::Search.new('index') do
135
+ highlight :body, :title
136
+ end
137
+
138
+ assert_not_nil s.highlight
139
+ assert_instance_of Slingshot::Search::Highlight, s.highlight
140
+ end
141
+
142
+ should "allow to specify highlight with for more fields with options" do
143
+ s = Search::Search.new('index') do
144
+ highlight :body, :title => { :fragment_size => 150, :number_of_fragments => 3 }
145
+ end
146
+
147
+ assert_not_nil s.highlight
148
+ assert_instance_of Slingshot::Search::Highlight, s.highlight
149
+ end
150
+
151
+ end
152
+
102
153
  context "with from/size" do
103
154
 
104
155
  should "set the values in request" do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slingshot-rb
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 6
10
- version: 0.0.6
9
+ - 7
10
+ version: 0.0.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Karel Minarik
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-03-13 00:00:00 +01:00
18
+ date: 2011-04-02 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -152,18 +152,21 @@ files:
152
152
  - README.markdown
153
153
  - Rakefile
154
154
  - examples/dsl.rb
155
+ - examples/slingshot-dsl.rb
155
156
  - lib/slingshot-rb.rb
156
157
  - lib/slingshot.rb
157
158
  - lib/slingshot/client.rb
158
159
  - lib/slingshot/configuration.rb
159
160
  - lib/slingshot/dsl.rb
160
161
  - lib/slingshot/index.rb
162
+ - lib/slingshot/logger.rb
161
163
  - lib/slingshot/results/collection.rb
162
164
  - lib/slingshot/results/item.rb
163
165
  - lib/slingshot/rubyext/hash.rb
164
166
  - lib/slingshot/search.rb
165
167
  - lib/slingshot/search/facet.rb
166
168
  - lib/slingshot/search/filter.rb
169
+ - lib/slingshot/search/highlight.rb
167
170
  - lib/slingshot/search/query.rb
168
171
  - lib/slingshot/search/sort.rb
169
172
  - lib/slingshot/version.rb
@@ -176,6 +179,7 @@ files:
176
179
  - test/fixtures/articles/5.json
177
180
  - test/integration/facets_test.rb
178
181
  - test/integration/filters_test.rb
182
+ - test/integration/highlight_test.rb
179
183
  - test/integration/index_mapping_test.rb
180
184
  - test/integration/index_store_test.rb
181
185
  - test/integration/query_string_test.rb
@@ -186,10 +190,12 @@ files:
186
190
  - test/unit/client_test.rb
187
191
  - test/unit/configuration_test.rb
188
192
  - test/unit/index_test.rb
193
+ - test/unit/logger_test.rb
189
194
  - test/unit/results_collection_test.rb
190
195
  - test/unit/results_item_test.rb
191
196
  - test/unit/search_facet_test.rb
192
197
  - test/unit/search_filter_test.rb
198
+ - test/unit/search_highlight_test.rb
193
199
  - test/unit/search_query_test.rb
194
200
  - test/unit/search_sort_test.rb
195
201
  - test/unit/search_test.rb
@@ -238,6 +244,7 @@ test_files:
238
244
  - test/fixtures/articles/5.json
239
245
  - test/integration/facets_test.rb
240
246
  - test/integration/filters_test.rb
247
+ - test/integration/highlight_test.rb
241
248
  - test/integration/index_mapping_test.rb
242
249
  - test/integration/index_store_test.rb
243
250
  - test/integration/query_string_test.rb
@@ -248,10 +255,12 @@ test_files:
248
255
  - test/unit/client_test.rb
249
256
  - test/unit/configuration_test.rb
250
257
  - test/unit/index_test.rb
258
+ - test/unit/logger_test.rb
251
259
  - test/unit/results_collection_test.rb
252
260
  - test/unit/results_item_test.rb
253
261
  - test/unit/search_facet_test.rb
254
262
  - test/unit/search_filter_test.rb
263
+ - test/unit/search_highlight_test.rb
255
264
  - test/unit/search_query_test.rb
256
265
  - test/unit/search_sort_test.rb
257
266
  - test/unit/search_test.rb