elasticsearch-rails 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +6 -0
- data/README.md +15 -13
- data/elasticsearch-rails.gemspec +1 -0
- data/lib/elasticsearch/rails/tasks/import.rb +6 -4
- data/lib/elasticsearch/rails/version.rb +1 -1
- data/lib/rails/templates/01-basic.rb +45 -27
- data/lib/rails/templates/02-pretty.rb +30 -15
- data/lib/rails/templates/03-expert.rb +240 -825
- data/lib/rails/templates/articles.yml.gz +0 -0
- data/lib/rails/templates/index.html.erb +157 -0
- data/lib/rails/templates/indexer.rb +27 -0
- data/lib/rails/templates/search.css +72 -0
- data/lib/rails/templates/searchable.rb +212 -0
- data/lib/rails/templates/seeds.rb +56 -0
- metadata +25 -2
Binary file
|
@@ -0,0 +1,157 @@
|
|
1
|
+
<div class="col-md-12">
|
2
|
+
<h1 class="text-right"><%= link_to 'Search New York Times articles', root_path %></h1>
|
3
|
+
|
4
|
+
<%= form_tag search_path, method: 'get', role: 'search' do %>
|
5
|
+
<div class="input-group">
|
6
|
+
<%= text_field_tag :q, params[:q], class: 'form-control', placeholder: 'Search...' %>
|
7
|
+
|
8
|
+
<span class="input-group-btn">
|
9
|
+
<button type="submit" class="btn btn-default">
|
10
|
+
<span class="glyphicon glyphicon-search"></span>
|
11
|
+
</button>
|
12
|
+
</span>
|
13
|
+
</div>
|
14
|
+
|
15
|
+
<div id="form-options" class="clearfix">
|
16
|
+
<div class="btn-group pull-left">
|
17
|
+
<label class="checkbox-inline">
|
18
|
+
<%= check_box_tag 'comments', 'y', params[:comments] == 'y', onclick: "$(this).closest('form').submit()" %>
|
19
|
+
Search in comments?
|
20
|
+
</label>
|
21
|
+
<% params.slice(:a, :c, :s).each do |name, value| %>
|
22
|
+
<%= hidden_field_tag name, value %>
|
23
|
+
<% end %>
|
24
|
+
</div>
|
25
|
+
|
26
|
+
<div class="btn-group pull-right">
|
27
|
+
<p style="float: left; margin: 0.1em 0 0 0"><small>Displaying <%= (params[:page] || 1).to_i.ordinalize %> page with <%= @articles.size %> articles
|
28
|
+
of <strong>total <%= @articles.total %></strong></small></p>
|
29
|
+
|
30
|
+
<button class="btn btn-default btn-xs dropdown-toggle" type="button" data-toggle="dropdown" style="margin-left: 0.5em">
|
31
|
+
<% sort = case
|
32
|
+
when params[:s] then params[:s]
|
33
|
+
when params[:q].blank? then 'published_on'
|
34
|
+
else 'relevancy'
|
35
|
+
end
|
36
|
+
%>
|
37
|
+
sorted by <%= sort.humanize.downcase %> <span class="caret"></span>
|
38
|
+
</button>
|
39
|
+
<ul class="dropdown-menu" role="menu">
|
40
|
+
<li><%= link_to "Sort by published on", search_path(params.merge(s: 'published_on')), class: 'btn-xs' %></li>
|
41
|
+
<li><%= link_to "Sort by relevancy", search_path(params.merge(s: nil)), class: 'btn-xs' %></li>
|
42
|
+
</ul>
|
43
|
+
</div>
|
44
|
+
</div>
|
45
|
+
<% end %>
|
46
|
+
|
47
|
+
<hr>
|
48
|
+
</div>
|
49
|
+
|
50
|
+
<% if @articles.size < 1 && (suggestions = @articles.response.response['suggest']) && suggestions.present? %>
|
51
|
+
<div class="col-md-12">
|
52
|
+
<p class="alert alert-warning">
|
53
|
+
No documents have been found. Maybe you mean
|
54
|
+
<%= suggestions.map { |k,v| v.first['options'] }.flatten.map {|v| v['text']}.uniq.map do |term|
|
55
|
+
link_to term, search_path(params.merge q: term)
|
56
|
+
end.to_sentence(last_word_connector: ' or ').html_safe %>?
|
57
|
+
</p>
|
58
|
+
</div>
|
59
|
+
<% end %>
|
60
|
+
|
61
|
+
<div id="facets" class="col-md-3">
|
62
|
+
<% unless @articles.size < 1 %>
|
63
|
+
|
64
|
+
<div class="categories panel panel-default">
|
65
|
+
<p class="panel-heading"><%= link_to 'All Sections →'.html_safe, search_path(params.merge(c: nil))%></p>
|
66
|
+
|
67
|
+
<div class="list-group">
|
68
|
+
<% @articles.response.response['facets']['categories']['terms'].each do |c| %>
|
69
|
+
<%=
|
70
|
+
link_to search_path(params.merge(c: c['term'])),
|
71
|
+
class: "list-group-item#{' active' if params[:c] == c['term']}" do
|
72
|
+
c['term'].titleize.html_safe + content_tag(:small, c['count'], class: 'badge').html_safe
|
73
|
+
end
|
74
|
+
%>
|
75
|
+
<% end %>
|
76
|
+
</div>
|
77
|
+
</div>
|
78
|
+
|
79
|
+
<div class="authors panel panel-default">
|
80
|
+
<p class="panel-heading"><%= link_to 'All Authors →'.html_safe, search_path(params.merge(a: nil))%></p>
|
81
|
+
|
82
|
+
<div class="list-group">
|
83
|
+
<% @articles.response.response['facets']['authors']['terms'].each do |a| %>
|
84
|
+
<%=
|
85
|
+
link_to search_path(params.merge(a: a['term'])),
|
86
|
+
class: "list-group-item#{' active' if params[:a] == a['term']}" do
|
87
|
+
a['term'].titleize.html_safe + content_tag(:small, a['count'], class: 'badge').html_safe
|
88
|
+
end
|
89
|
+
%>
|
90
|
+
<% end %>
|
91
|
+
</div>
|
92
|
+
</div>
|
93
|
+
|
94
|
+
<div class="authors panel panel-default">
|
95
|
+
<p class="panel-heading"><%= link_to 'Any Date →'.html_safe, search_path(params.merge(w: nil))%></p>
|
96
|
+
|
97
|
+
<div class="list-group">
|
98
|
+
<% @articles.response.response['facets']['published']['entries'].each do |w| %>
|
99
|
+
<%=
|
100
|
+
__start = Time.at(w['time']/1000)
|
101
|
+
__end = __start.end_of_week
|
102
|
+
__date = __start.to_date.to_s(:iso)
|
103
|
+
|
104
|
+
link_to search_path(params.merge(w: __date)),
|
105
|
+
class: "list-group-item#{' active' if params[:w] == __date}" do
|
106
|
+
"#{__start.to_date.to_s(:short)} — #{__end.to_date.to_s(:short)}".html_safe + \
|
107
|
+
content_tag(:small, w['count'], class: 'badge').html_safe
|
108
|
+
end
|
109
|
+
%>
|
110
|
+
<% end %>
|
111
|
+
</div>
|
112
|
+
</div>
|
113
|
+
<% end %>
|
114
|
+
</div>
|
115
|
+
|
116
|
+
<div class="col-md-9">
|
117
|
+
<div id="results">
|
118
|
+
<% @articles.each do |article| %>
|
119
|
+
<div class="result">
|
120
|
+
<h3 class="title">
|
121
|
+
<%= (article.try(:highlight).try(:title) ? article.highlight.title.join.html_safe : article.title) %>
|
122
|
+
<small class="category"><%= article.categories.to_sentence %></small>
|
123
|
+
</h3>
|
124
|
+
|
125
|
+
<p class="body">
|
126
|
+
<% if article.try(:highlight).try(:abstract) %>
|
127
|
+
<%= article.highlight.abstract.join.html_safe %>
|
128
|
+
<% else %>
|
129
|
+
<%= article.try(:highlight).try(:content) ? article.highlight.content.join('…').html_safe : article.abstract %>
|
130
|
+
<% end %>
|
131
|
+
</p>
|
132
|
+
|
133
|
+
<% if comments = article.try(:highlight) && article.highlight['comments.body'] %>
|
134
|
+
<p class="comments">
|
135
|
+
Comments: <%= comments.join('…').html_safe %>
|
136
|
+
</p>
|
137
|
+
<% end %>
|
138
|
+
|
139
|
+
<p class="text-muted">
|
140
|
+
<small>Authors: <%= article.authors.map(&:full_name).to_sentence %></small> |
|
141
|
+
<small>Published: <%= article.published_on %></small> |
|
142
|
+
<small>Score: <%= article._score %></small>
|
143
|
+
</p>
|
144
|
+
</div>
|
145
|
+
<% end %>
|
146
|
+
</div>
|
147
|
+
|
148
|
+
<ul class="pager">
|
149
|
+
<li class="previous"><%= link_to_previous_page @articles, 'Previous Page', params: params.slice(:q, :c, :a, :comments) %></li>
|
150
|
+
<li class="next"><%= link_to_next_page @articles, 'Next Page', params: params.slice(:q, :c, :a, :comments) %></li>
|
151
|
+
</ul>
|
152
|
+
|
153
|
+
</div>
|
154
|
+
|
155
|
+
<div class="footer <%= @articles.size < 1 ? 'col-md-12' : 'col-md-9 col-md-offset-3' %>">
|
156
|
+
<p><small>Content provided by <a href="http://nytimes.com"><em>The New York Times</em></a>.</small></p>
|
157
|
+
</div>
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# Indexer class for <http://sidekiq.org>
|
2
|
+
#
|
3
|
+
# Run me with:
|
4
|
+
#
|
5
|
+
# $ bundle exec sidekiq --queue elasticsearch --verbose
|
6
|
+
#
|
7
|
+
class Indexer
|
8
|
+
include Sidekiq::Worker
|
9
|
+
sidekiq_options queue: 'elasticsearch', retry: false, backtrace: true
|
10
|
+
|
11
|
+
Logger = Sidekiq.logger.level == Logger::DEBUG ? Sidekiq.logger : nil
|
12
|
+
Client = Elasticsearch::Client.new host: (ENV['ELASTICSEARCH_URL'] || 'http://localhost:9200'), logger: Logger
|
13
|
+
|
14
|
+
def perform(operation, klass, record_id, options={})
|
15
|
+
logger.debug [operation, "#{klass}##{record_id} #{options.inspect}"]
|
16
|
+
|
17
|
+
case operation.to_s
|
18
|
+
when /index|update/
|
19
|
+
record = klass.constantize.find(record_id)
|
20
|
+
record.__elasticsearch__.client = Client
|
21
|
+
record.__elasticsearch__.__send__ "#{operation}_document"
|
22
|
+
when /delete/
|
23
|
+
Client.delete index: klass.constantize.index_name, type: klass.constantize.document_type, id: record_id
|
24
|
+
else raise ArgumentError, "Unknown operation '#{operation}'"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
h1 {
|
2
|
+
font-size: 28px !important;
|
3
|
+
color: #a3a3a3 !important;
|
4
|
+
text-transform: uppercase;
|
5
|
+
letter-spacing: -2px;
|
6
|
+
}
|
7
|
+
|
8
|
+
.label-highlight {
|
9
|
+
background: #f6fbfc !important;
|
10
|
+
box-shadow: 0px 1px 0px rgba(0,0,0,0.15);
|
11
|
+
padding: 0.2em 0.4em 0.2em 0.4em !important;
|
12
|
+
}
|
13
|
+
|
14
|
+
h3 .label-highlight {
|
15
|
+
background: transparent !important;
|
16
|
+
padding: 0.1em 0.4em 0px 0.4em !important;
|
17
|
+
border-bottom: 1px solid #999;
|
18
|
+
box-shadow: 0px 2px 1px rgba(0,0,0,0.15);
|
19
|
+
border-radius: 0;
|
20
|
+
}
|
21
|
+
|
22
|
+
.comments .label-highlight {
|
23
|
+
background: #fcfdf0 !important;
|
24
|
+
}
|
25
|
+
|
26
|
+
small.badge {
|
27
|
+
font-size: 80% !important;
|
28
|
+
font-weight: normal !important;
|
29
|
+
display: inline-block;
|
30
|
+
float: right;
|
31
|
+
}
|
32
|
+
|
33
|
+
form #form-options {
|
34
|
+
color: #666;
|
35
|
+
font-size: 95%;
|
36
|
+
margin-top: 1.5em;
|
37
|
+
padding: 0 0.25em;
|
38
|
+
}
|
39
|
+
form #form-options input {
|
40
|
+
margin-top: 0.25em;
|
41
|
+
}
|
42
|
+
|
43
|
+
#facets .panel-heading {
|
44
|
+
margin-bottom: 0;
|
45
|
+
}
|
46
|
+
|
47
|
+
.result {
|
48
|
+
border-bottom: 1px solid #ccc;
|
49
|
+
margin: 2em 0 0 0;
|
50
|
+
padding: 0 0 1em 0;
|
51
|
+
}
|
52
|
+
.result:first-child {
|
53
|
+
margin-top: 0.25em;
|
54
|
+
}
|
55
|
+
|
56
|
+
.result h3.title {
|
57
|
+
font-family: 'Rokkitt', sans-serif;
|
58
|
+
margin-top: 0;
|
59
|
+
}
|
60
|
+
|
61
|
+
.result .body {
|
62
|
+
font-family: Georgia, serif;
|
63
|
+
}
|
64
|
+
|
65
|
+
.result .category {
|
66
|
+
font-family: 'Rokkitt', sans-serif;
|
67
|
+
}
|
68
|
+
|
69
|
+
.result .comments {
|
70
|
+
color: #666666;
|
71
|
+
font-size: 80%;
|
72
|
+
}
|
@@ -0,0 +1,212 @@
|
|
1
|
+
module Searchable
|
2
|
+
extend ActiveSupport::Concern
|
3
|
+
|
4
|
+
included do
|
5
|
+
include Elasticsearch::Model
|
6
|
+
|
7
|
+
# Customize the index name
|
8
|
+
#
|
9
|
+
index_name [Rails.application.engine_name, Rails.env].join('_')
|
10
|
+
|
11
|
+
# Set up index configuration and mapping
|
12
|
+
#
|
13
|
+
settings index: { number_of_shards: 1, number_of_replicas: 0 } do
|
14
|
+
mapping do
|
15
|
+
indexes :title, type: 'multi_field' do
|
16
|
+
indexes :title, analyzer: 'snowball'
|
17
|
+
indexes :tokenized, analyzer: 'simple'
|
18
|
+
end
|
19
|
+
|
20
|
+
indexes :content, type: 'multi_field' do
|
21
|
+
indexes :content, analyzer: 'snowball'
|
22
|
+
indexes :tokenized, analyzer: 'simple'
|
23
|
+
end
|
24
|
+
|
25
|
+
indexes :published_on, type: 'date'
|
26
|
+
|
27
|
+
indexes :authors do
|
28
|
+
indexes :full_name, type: 'multi_field' do
|
29
|
+
indexes :full_name
|
30
|
+
indexes :raw, analyzer: 'keyword'
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
indexes :categories, analyzer: 'keyword'
|
35
|
+
|
36
|
+
indexes :comments, type: 'nested' do
|
37
|
+
indexes :body, analyzer: 'snowball'
|
38
|
+
indexes :stars
|
39
|
+
indexes :pick
|
40
|
+
indexes :user, analyzer: 'keyword'
|
41
|
+
indexes :user_location, type: 'multi_field' do
|
42
|
+
indexes :user_location
|
43
|
+
indexes :raw, analyzer: 'keyword'
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Set up callbacks for updating the index on model changes
|
50
|
+
#
|
51
|
+
after_commit lambda { Indexer.perform_async(:index, self.class.to_s, self.id) }, on: :create
|
52
|
+
after_commit lambda { Indexer.perform_async(:update, self.class.to_s, self.id) }, on: :update
|
53
|
+
after_commit lambda { Indexer.perform_async(:delete, self.class.to_s, self.id) }, on: :destroy
|
54
|
+
after_touch lambda { Indexer.perform_async(:update, self.class.to_s, self.id) }
|
55
|
+
|
56
|
+
# Customize the JSON serialization for Elasticsearch
|
57
|
+
#
|
58
|
+
def as_indexed_json(options={})
|
59
|
+
hash = self.as_json(
|
60
|
+
include: { authors: { methods: [:full_name], only: [:full_name] },
|
61
|
+
comments: { only: [:body, :stars, :pick, :user, :user_location] }
|
62
|
+
})
|
63
|
+
hash['categories'] = self.categories.map(&:title)
|
64
|
+
hash
|
65
|
+
end
|
66
|
+
|
67
|
+
# Search in title and content fields for `query`, include highlights in response
|
68
|
+
#
|
69
|
+
# @param query [String] The user query
|
70
|
+
# @return [Elasticsearch::Model::Response::Response]
|
71
|
+
#
|
72
|
+
def self.search(query, options={})
|
73
|
+
|
74
|
+
# Prefill and set the filters (top-level `filter` and `facet_filter` elements)
|
75
|
+
#
|
76
|
+
__set_filters = lambda do |key, f|
|
77
|
+
|
78
|
+
@search_definition[:filter][:and] ||= []
|
79
|
+
@search_definition[:filter][:and] |= [f]
|
80
|
+
|
81
|
+
@search_definition[:facets][key.to_sym][:facet_filter][:and] ||= []
|
82
|
+
@search_definition[:facets][key.to_sym][:facet_filter][:and] |= [f]
|
83
|
+
end
|
84
|
+
|
85
|
+
@search_definition = {
|
86
|
+
query: {},
|
87
|
+
|
88
|
+
highlight: {
|
89
|
+
pre_tags: ['<em class="label label-highlight">'],
|
90
|
+
post_tags: ['</em>'],
|
91
|
+
fields: {
|
92
|
+
title: { number_of_fragments: 0 },
|
93
|
+
abstract: { number_of_fragments: 0 },
|
94
|
+
content: { fragment_size: 50 }
|
95
|
+
}
|
96
|
+
},
|
97
|
+
|
98
|
+
filter: {},
|
99
|
+
|
100
|
+
facets: {
|
101
|
+
categories: {
|
102
|
+
terms: {
|
103
|
+
field: 'categories'
|
104
|
+
},
|
105
|
+
facet_filter: {}
|
106
|
+
},
|
107
|
+
authors: {
|
108
|
+
terms: {
|
109
|
+
field: 'authors.full_name.raw'
|
110
|
+
},
|
111
|
+
facet_filter: {}
|
112
|
+
},
|
113
|
+
published: {
|
114
|
+
date_histogram: {
|
115
|
+
field: 'published_on',
|
116
|
+
interval: 'week'
|
117
|
+
},
|
118
|
+
facet_filter: {}
|
119
|
+
}
|
120
|
+
}
|
121
|
+
}
|
122
|
+
|
123
|
+
unless query.blank?
|
124
|
+
@search_definition[:query] = {
|
125
|
+
bool: {
|
126
|
+
should: [
|
127
|
+
{ multi_match: {
|
128
|
+
query: query,
|
129
|
+
fields: ['title^10', 'abstract^2', 'content'],
|
130
|
+
operator: 'and'
|
131
|
+
}
|
132
|
+
}
|
133
|
+
]
|
134
|
+
}
|
135
|
+
}
|
136
|
+
else
|
137
|
+
@search_definition[:query] = { match_all: {} }
|
138
|
+
@search_definition[:sort] = { published_on: 'desc' }
|
139
|
+
end
|
140
|
+
|
141
|
+
if options[:category]
|
142
|
+
f = { term: { categories: options[:category] } }
|
143
|
+
|
144
|
+
__set_filters.(:authors, f)
|
145
|
+
__set_filters.(:published, f)
|
146
|
+
end
|
147
|
+
|
148
|
+
if options[:author]
|
149
|
+
f = { term: { 'authors.full_name.raw' => options[:author] } }
|
150
|
+
|
151
|
+
__set_filters.(:categories, f)
|
152
|
+
__set_filters.(:published, f)
|
153
|
+
end
|
154
|
+
|
155
|
+
if options[:published_week]
|
156
|
+
f = {
|
157
|
+
range: {
|
158
|
+
published_on: {
|
159
|
+
gte: options[:published_week],
|
160
|
+
lte: "#{options[:published_week]}||+1w"
|
161
|
+
}
|
162
|
+
}
|
163
|
+
}
|
164
|
+
|
165
|
+
__set_filters.(:categories, f)
|
166
|
+
__set_filters.(:authors, f)
|
167
|
+
end
|
168
|
+
|
169
|
+
if query.present? && options[:comments]
|
170
|
+
@search_definition[:query][:bool][:should] ||= []
|
171
|
+
@search_definition[:query][:bool][:should] << {
|
172
|
+
nested: {
|
173
|
+
path: 'comments',
|
174
|
+
query: {
|
175
|
+
multi_match: {
|
176
|
+
query: query,
|
177
|
+
fields: ['body'],
|
178
|
+
operator: 'and'
|
179
|
+
}
|
180
|
+
}
|
181
|
+
}
|
182
|
+
}
|
183
|
+
@search_definition[:highlight][:fields].update 'comments.body' => { fragment_size: 50 }
|
184
|
+
end
|
185
|
+
|
186
|
+
if options[:sort]
|
187
|
+
@search_definition[:sort] = { options[:sort] => 'desc' }
|
188
|
+
@search_definition[:track_scores] = true
|
189
|
+
end
|
190
|
+
|
191
|
+
unless query.blank?
|
192
|
+
@search_definition[:suggest] = {
|
193
|
+
text: query,
|
194
|
+
suggest_title: {
|
195
|
+
term: {
|
196
|
+
field: 'title.tokenized',
|
197
|
+
suggest_mode: 'always'
|
198
|
+
}
|
199
|
+
},
|
200
|
+
suggest_body: {
|
201
|
+
term: {
|
202
|
+
field: 'content.tokenized',
|
203
|
+
suggest_mode: 'always'
|
204
|
+
}
|
205
|
+
}
|
206
|
+
}
|
207
|
+
end
|
208
|
+
|
209
|
+
__elasticsearch__.search(@search_definition)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|