elasticsearch-rails 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,157 @@
1
+ <div class="col-md-12">
2
+ <h1 class="text-right"><%= link_to 'Search New York Times articles', root_path %></h1>
3
+
4
+ <%= form_tag search_path, method: 'get', role: 'search' do %>
5
+ <div class="input-group">
6
+ <%= text_field_tag :q, params[:q], class: 'form-control', placeholder: 'Search...' %>
7
+
8
+ <span class="input-group-btn">
9
+ <button type="submit" class="btn btn-default">
10
+ <span class="glyphicon glyphicon-search"></span>
11
+ </button>
12
+ </span>
13
+ </div>
14
+
15
+ <div id="form-options" class="clearfix">
16
+ <div class="btn-group pull-left">
17
+ <label class="checkbox-inline">
18
+ <%= check_box_tag 'comments', 'y', params[:comments] == 'y', onclick: "$(this).closest('form').submit()" %>
19
+ Search in comments?
20
+ </label>
21
+ <% params.slice(:a, :c, :s).each do |name, value| %>
22
+ <%= hidden_field_tag name, value %>
23
+ <% end %>
24
+ </div>
25
+
26
+ <div class="btn-group pull-right">
27
+ <p style="float: left; margin: 0.1em 0 0 0"><small>Displaying <%= (params[:page] || 1).to_i.ordinalize %> page with <%= @articles.size %> articles
28
+ of <strong>total <%= @articles.total %></strong></small></p>
29
+
30
+ <button class="btn btn-default btn-xs dropdown-toggle" type="button" data-toggle="dropdown" style="margin-left: 0.5em">
31
+ <% sort = case
32
+ when params[:s] then params[:s]
33
+ when params[:q].blank? then 'published_on'
34
+ else 'relevancy'
35
+ end
36
+ %>
37
+ sorted by <%= sort.humanize.downcase %> <span class="caret"></span>
38
+ </button>
39
+ <ul class="dropdown-menu" role="menu">
40
+ <li><%= link_to "Sort by published on", search_path(params.merge(s: 'published_on')), class: 'btn-xs' %></li>
41
+ <li><%= link_to "Sort by relevancy", search_path(params.merge(s: nil)), class: 'btn-xs' %></li>
42
+ </ul>
43
+ </div>
44
+ </div>
45
+ <% end %>
46
+
47
+ <hr>
48
+ </div>
49
+
50
+ <% if @articles.size < 1 && (suggestions = @articles.response.response['suggest']) && suggestions.present? %>
51
+ <div class="col-md-12">
52
+ <p class="alert alert-warning">
53
+ No documents have been found. Maybe you mean
54
+ <%= suggestions.map { |k,v| v.first['options'] }.flatten.map {|v| v['text']}.uniq.map do |term|
55
+ link_to term, search_path(params.merge q: term)
56
+ end.to_sentence(last_word_connector: ' or ').html_safe %>?
57
+ </p>
58
+ </div>
59
+ <% end %>
60
+
61
+ <div id="facets" class="col-md-3">
62
+ <% unless @articles.size < 1 %>
63
+
64
+ <div class="categories panel panel-default">
65
+ <p class="panel-heading"><%= link_to 'All Sections &rarr;'.html_safe, search_path(params.merge(c: nil))%></p>
66
+
67
+ <div class="list-group">
68
+ <% @articles.response.response['facets']['categories']['terms'].each do |c| %>
69
+ <%=
70
+ link_to search_path(params.merge(c: c['term'])),
71
+ class: "list-group-item#{' active' if params[:c] == c['term']}" do
72
+ c['term'].titleize.html_safe + content_tag(:small, c['count'], class: 'badge').html_safe
73
+ end
74
+ %>
75
+ <% end %>
76
+ </div>
77
+ </div>
78
+
79
+ <div class="authors panel panel-default">
80
+ <p class="panel-heading"><%= link_to 'All Authors &rarr;'.html_safe, search_path(params.merge(a: nil))%></p>
81
+
82
+ <div class="list-group">
83
+ <% @articles.response.response['facets']['authors']['terms'].each do |a| %>
84
+ <%=
85
+ link_to search_path(params.merge(a: a['term'])),
86
+ class: "list-group-item#{' active' if params[:a] == a['term']}" do
87
+ a['term'].titleize.html_safe + content_tag(:small, a['count'], class: 'badge').html_safe
88
+ end
89
+ %>
90
+ <% end %>
91
+ </div>
92
+ </div>
93
+
94
+ <div class="authors panel panel-default">
95
+ <p class="panel-heading"><%= link_to 'Any Date &rarr;'.html_safe, search_path(params.merge(w: nil))%></p>
96
+
97
+ <div class="list-group">
98
+ <% @articles.response.response['facets']['published']['entries'].each do |w| %>
99
+ <%=
100
+ __start = Time.at(w['time']/1000)
101
+ __end = __start.end_of_week
102
+ __date = __start.to_date.to_s(:iso)
103
+
104
+ link_to search_path(params.merge(w: __date)),
105
+ class: "list-group-item#{' active' if params[:w] == __date}" do
106
+ "#{__start.to_date.to_s(:short)} &mdash; #{__end.to_date.to_s(:short)}".html_safe + \
107
+ content_tag(:small, w['count'], class: 'badge').html_safe
108
+ end
109
+ %>
110
+ <% end %>
111
+ </div>
112
+ </div>
113
+ <% end %>
114
+ </div>
115
+
116
+ <div class="col-md-9">
117
+ <div id="results">
118
+ <% @articles.each do |article| %>
119
+ <div class="result">
120
+ <h3 class="title">
121
+ <%= (article.try(:highlight).try(:title) ? article.highlight.title.join.html_safe : article.title) %>
122
+ <small class="category"><%= article.categories.to_sentence %></small>
123
+ </h3>
124
+
125
+ <p class="body">
126
+ <% if article.try(:highlight).try(:abstract) %>
127
+ <%= article.highlight.abstract.join.html_safe %>
128
+ <% else %>
129
+ <%= article.try(:highlight).try(:content) ? article.highlight.content.join('&hellip;').html_safe : article.abstract %>
130
+ <% end %>
131
+ </p>
132
+
133
+ <% if comments = article.try(:highlight) && article.highlight['comments.body'] %>
134
+ <p class="comments">
135
+ Comments: <%= comments.join('&hellip;').html_safe %>
136
+ </p>
137
+ <% end %>
138
+
139
+ <p class="text-muted">
140
+ <small>Authors: <%= article.authors.map(&:full_name).to_sentence %></small> |
141
+ <small>Published: <%= article.published_on %></small> |
142
+ <small>Score: <%= article._score %></small>
143
+ </p>
144
+ </div>
145
+ <% end %>
146
+ </div>
147
+
148
+ <ul class="pager">
149
+ <li class="previous"><%= link_to_previous_page @articles, 'Previous Page', params: params.slice(:q, :c, :a, :comments) %></li>
150
+ <li class="next"><%= link_to_next_page @articles, 'Next Page', params: params.slice(:q, :c, :a, :comments) %></li>
151
+ </ul>
152
+
153
+ </div>
154
+
155
+ <div class="footer <%= @articles.size < 1 ? 'col-md-12' : 'col-md-9 col-md-offset-3' %>">
156
+ <p><small>Content provided by <a href="http://nytimes.com"><em>The New York Times</em></a>.</small></p>
157
+ </div>
@@ -0,0 +1,27 @@
1
+ # Indexer class for <http://sidekiq.org>
2
+ #
3
+ # Run me with:
4
+ #
5
+ # $ bundle exec sidekiq --queue elasticsearch --verbose
6
+ #
7
+ class Indexer
8
+ include Sidekiq::Worker
9
+ sidekiq_options queue: 'elasticsearch', retry: false, backtrace: true
10
+
11
+ Logger = Sidekiq.logger.level == Logger::DEBUG ? Sidekiq.logger : nil
12
+ Client = Elasticsearch::Client.new host: (ENV['ELASTICSEARCH_URL'] || 'http://localhost:9200'), logger: Logger
13
+
14
+ def perform(operation, klass, record_id, options={})
15
+ logger.debug [operation, "#{klass}##{record_id} #{options.inspect}"]
16
+
17
+ case operation.to_s
18
+ when /index|update/
19
+ record = klass.constantize.find(record_id)
20
+ record.__elasticsearch__.client = Client
21
+ record.__elasticsearch__.__send__ "#{operation}_document"
22
+ when /delete/
23
+ Client.delete index: klass.constantize.index_name, type: klass.constantize.document_type, id: record_id
24
+ else raise ArgumentError, "Unknown operation '#{operation}'"
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,72 @@
1
+ h1 {
2
+ font-size: 28px !important;
3
+ color: #a3a3a3 !important;
4
+ text-transform: uppercase;
5
+ letter-spacing: -2px;
6
+ }
7
+
8
+ .label-highlight {
9
+ background: #f6fbfc !important;
10
+ box-shadow: 0px 1px 0px rgba(0,0,0,0.15);
11
+ padding: 0.2em 0.4em 0.2em 0.4em !important;
12
+ }
13
+
14
+ h3 .label-highlight {
15
+ background: transparent !important;
16
+ padding: 0.1em 0.4em 0px 0.4em !important;
17
+ border-bottom: 1px solid #999;
18
+ box-shadow: 0px 2px 1px rgba(0,0,0,0.15);
19
+ border-radius: 0;
20
+ }
21
+
22
+ .comments .label-highlight {
23
+ background: #fcfdf0 !important;
24
+ }
25
+
26
+ small.badge {
27
+ font-size: 80% !important;
28
+ font-weight: normal !important;
29
+ display: inline-block;
30
+ float: right;
31
+ }
32
+
33
+ form #form-options {
34
+ color: #666;
35
+ font-size: 95%;
36
+ margin-top: 1.5em;
37
+ padding: 0 0.25em;
38
+ }
39
+ form #form-options input {
40
+ margin-top: 0.25em;
41
+ }
42
+
43
+ #facets .panel-heading {
44
+ margin-bottom: 0;
45
+ }
46
+
47
+ .result {
48
+ border-bottom: 1px solid #ccc;
49
+ margin: 2em 0 0 0;
50
+ padding: 0 0 1em 0;
51
+ }
52
+ .result:first-child {
53
+ margin-top: 0.25em;
54
+ }
55
+
56
+ .result h3.title {
57
+ font-family: 'Rokkitt', sans-serif;
58
+ margin-top: 0;
59
+ }
60
+
61
+ .result .body {
62
+ font-family: Georgia, serif;
63
+ }
64
+
65
+ .result .category {
66
+ font-family: 'Rokkitt', sans-serif;
67
+ }
68
+
69
+ .result .comments {
70
+ color: #666666;
71
+ font-size: 80%;
72
+ }
@@ -0,0 +1,212 @@
1
+ module Searchable
2
+ extend ActiveSupport::Concern
3
+
4
+ included do
5
+ include Elasticsearch::Model
6
+
7
+ # Customize the index name
8
+ #
9
+ index_name [Rails.application.engine_name, Rails.env].join('_')
10
+
11
+ # Set up index configuration and mapping
12
+ #
13
+ settings index: { number_of_shards: 1, number_of_replicas: 0 } do
14
+ mapping do
15
+ indexes :title, type: 'multi_field' do
16
+ indexes :title, analyzer: 'snowball'
17
+ indexes :tokenized, analyzer: 'simple'
18
+ end
19
+
20
+ indexes :content, type: 'multi_field' do
21
+ indexes :content, analyzer: 'snowball'
22
+ indexes :tokenized, analyzer: 'simple'
23
+ end
24
+
25
+ indexes :published_on, type: 'date'
26
+
27
+ indexes :authors do
28
+ indexes :full_name, type: 'multi_field' do
29
+ indexes :full_name
30
+ indexes :raw, analyzer: 'keyword'
31
+ end
32
+ end
33
+
34
+ indexes :categories, analyzer: 'keyword'
35
+
36
+ indexes :comments, type: 'nested' do
37
+ indexes :body, analyzer: 'snowball'
38
+ indexes :stars
39
+ indexes :pick
40
+ indexes :user, analyzer: 'keyword'
41
+ indexes :user_location, type: 'multi_field' do
42
+ indexes :user_location
43
+ indexes :raw, analyzer: 'keyword'
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ # Set up callbacks for updating the index on model changes
50
+ #
51
+ after_commit lambda { Indexer.perform_async(:index, self.class.to_s, self.id) }, on: :create
52
+ after_commit lambda { Indexer.perform_async(:update, self.class.to_s, self.id) }, on: :update
53
+ after_commit lambda { Indexer.perform_async(:delete, self.class.to_s, self.id) }, on: :destroy
54
+ after_touch lambda { Indexer.perform_async(:update, self.class.to_s, self.id) }
55
+
56
+ # Customize the JSON serialization for Elasticsearch
57
+ #
58
+ def as_indexed_json(options={})
59
+ hash = self.as_json(
60
+ include: { authors: { methods: [:full_name], only: [:full_name] },
61
+ comments: { only: [:body, :stars, :pick, :user, :user_location] }
62
+ })
63
+ hash['categories'] = self.categories.map(&:title)
64
+ hash
65
+ end
66
+
67
+ # Search in title and content fields for `query`, include highlights in response
68
+ #
69
+ # @param query [String] The user query
70
+ # @return [Elasticsearch::Model::Response::Response]
71
+ #
72
+ def self.search(query, options={})
73
+
74
+ # Prefill and set the filters (top-level `filter` and `facet_filter` elements)
75
+ #
76
+ __set_filters = lambda do |key, f|
77
+
78
+ @search_definition[:filter][:and] ||= []
79
+ @search_definition[:filter][:and] |= [f]
80
+
81
+ @search_definition[:facets][key.to_sym][:facet_filter][:and] ||= []
82
+ @search_definition[:facets][key.to_sym][:facet_filter][:and] |= [f]
83
+ end
84
+
85
+ @search_definition = {
86
+ query: {},
87
+
88
+ highlight: {
89
+ pre_tags: ['<em class="label label-highlight">'],
90
+ post_tags: ['</em>'],
91
+ fields: {
92
+ title: { number_of_fragments: 0 },
93
+ abstract: { number_of_fragments: 0 },
94
+ content: { fragment_size: 50 }
95
+ }
96
+ },
97
+
98
+ filter: {},
99
+
100
+ facets: {
101
+ categories: {
102
+ terms: {
103
+ field: 'categories'
104
+ },
105
+ facet_filter: {}
106
+ },
107
+ authors: {
108
+ terms: {
109
+ field: 'authors.full_name.raw'
110
+ },
111
+ facet_filter: {}
112
+ },
113
+ published: {
114
+ date_histogram: {
115
+ field: 'published_on',
116
+ interval: 'week'
117
+ },
118
+ facet_filter: {}
119
+ }
120
+ }
121
+ }
122
+
123
+ unless query.blank?
124
+ @search_definition[:query] = {
125
+ bool: {
126
+ should: [
127
+ { multi_match: {
128
+ query: query,
129
+ fields: ['title^10', 'abstract^2', 'content'],
130
+ operator: 'and'
131
+ }
132
+ }
133
+ ]
134
+ }
135
+ }
136
+ else
137
+ @search_definition[:query] = { match_all: {} }
138
+ @search_definition[:sort] = { published_on: 'desc' }
139
+ end
140
+
141
+ if options[:category]
142
+ f = { term: { categories: options[:category] } }
143
+
144
+ __set_filters.(:authors, f)
145
+ __set_filters.(:published, f)
146
+ end
147
+
148
+ if options[:author]
149
+ f = { term: { 'authors.full_name.raw' => options[:author] } }
150
+
151
+ __set_filters.(:categories, f)
152
+ __set_filters.(:published, f)
153
+ end
154
+
155
+ if options[:published_week]
156
+ f = {
157
+ range: {
158
+ published_on: {
159
+ gte: options[:published_week],
160
+ lte: "#{options[:published_week]}||+1w"
161
+ }
162
+ }
163
+ }
164
+
165
+ __set_filters.(:categories, f)
166
+ __set_filters.(:authors, f)
167
+ end
168
+
169
+ if query.present? && options[:comments]
170
+ @search_definition[:query][:bool][:should] ||= []
171
+ @search_definition[:query][:bool][:should] << {
172
+ nested: {
173
+ path: 'comments',
174
+ query: {
175
+ multi_match: {
176
+ query: query,
177
+ fields: ['body'],
178
+ operator: 'and'
179
+ }
180
+ }
181
+ }
182
+ }
183
+ @search_definition[:highlight][:fields].update 'comments.body' => { fragment_size: 50 }
184
+ end
185
+
186
+ if options[:sort]
187
+ @search_definition[:sort] = { options[:sort] => 'desc' }
188
+ @search_definition[:track_scores] = true
189
+ end
190
+
191
+ unless query.blank?
192
+ @search_definition[:suggest] = {
193
+ text: query,
194
+ suggest_title: {
195
+ term: {
196
+ field: 'title.tokenized',
197
+ suggest_mode: 'always'
198
+ }
199
+ },
200
+ suggest_body: {
201
+ term: {
202
+ field: 'content.tokenized',
203
+ suggest_mode: 'always'
204
+ }
205
+ }
206
+ }
207
+ end
208
+
209
+ __elasticsearch__.search(@search_definition)
210
+ end
211
+ end
212
+ end