es-query-builder 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +34 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +61 -0
- data/Rakefile +5 -0
- data/es-query-builder.gemspec +22 -0
- data/lib/es-query-builder.rb +93 -0
- data/lib/es-query-builder/parser.rb +224 -0
- data/lib/es-query-builder/token.rb +25 -0
- data/lib/es-query-builder/tokenizer.rb +69 -0
- data/lib/es-query-builder/version.rb +3 -0
- data/spec/es_query_builder_spec.rb +496 -0
- data/spec/spec_helper.rb +6 -0
- metadata +102 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 01a7f2c3df52977455ef8cc73bb06caffae1b5d8
|
4
|
+
data.tar.gz: f9b6ddc51b5b9f64fceb830d8a9a12b5e557d362
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 76e05d352511a51d07b53363273dc10da73ed421d09b8c92acf3bf7274a151f1756fd5ea68037a84efebc39f872c41590304be8df8aae1444bbed0a06aadf1b0
|
7
|
+
data.tar.gz: 4d19002288f950ad91db2bc5a819cd9fff51f74349a4aafa0bd9536af50cea9540c10defc4ed478e1ffb440a48517c143dad5403360b877766ba6e184af8b0c7
|
data/.gitignore
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/test/tmp/
|
9
|
+
/test/version_tmp/
|
10
|
+
/tmp/
|
11
|
+
|
12
|
+
## Specific to RubyMotion:
|
13
|
+
.dat*
|
14
|
+
.repl_history
|
15
|
+
build/
|
16
|
+
|
17
|
+
## Documentation cache and generated files:
|
18
|
+
/.yardoc/
|
19
|
+
/_yardoc/
|
20
|
+
/doc/
|
21
|
+
/rdoc/
|
22
|
+
|
23
|
+
## Environment normalisation:
|
24
|
+
/.bundle/
|
25
|
+
/lib/bundler/man/
|
26
|
+
|
27
|
+
# for a library or gem, you might want to ignore these files since the code is
|
28
|
+
# intended to run in multiple environments; otherwise, check them in:
|
29
|
+
Gemfile.lock
|
30
|
+
.ruby-version
|
31
|
+
.ruby-gemset
|
32
|
+
|
33
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
34
|
+
.rvmrc
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Yuku Takahashi
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
[![Build Status](https://travis-ci.org/increments/es-query-builder.svg?branch=master)](https://travis-ci.org/increments/es-query-builder)
|
2
|
+
|
3
|
+
NAME
|
4
|
+
====
|
5
|
+
|
6
|
+
`EsQueryBuilder` - A query builder for Elasticsearch in Ruby.
|
7
|
+
|
8
|
+
SYNOPSIS
|
9
|
+
========
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
builder = EsQueryBuilder.new(
|
13
|
+
# Fields allowed searching with match query.
|
14
|
+
query_fields: ['field1'],
|
15
|
+
# Fields for filtering. Queries for these fields do not affect search score.
|
16
|
+
filter_fields: ['field2']
|
17
|
+
)
|
18
|
+
|
19
|
+
query = builder.build(query_string_given_by_user)
|
20
|
+
|
21
|
+
body =
|
22
|
+
if query.nil?
|
23
|
+
# Empty query
|
24
|
+
{ size: 0 }
|
25
|
+
else
|
26
|
+
# Add other conditions, such as sort, highlight, fields and so on.
|
27
|
+
{
|
28
|
+
query: query,
|
29
|
+
sort: { ... }
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
client = Elasticsearch::Client.new(host: 'http://server:9200')
|
34
|
+
client.search({
|
35
|
+
index: 'index_name',
|
36
|
+
type: 'type_name',
|
37
|
+
body: body
|
38
|
+
})
|
39
|
+
# => #<Hash>
|
40
|
+
```
|
41
|
+
|
42
|
+
DESCRIPTION
|
43
|
+
===========
|
44
|
+
|
45
|
+
`EsQueryBuilder` converts a query string into a corresponding hash object for [elasticsearch-ruby](https://github.com/elasticsearch/elasticsearch-ruby).
|
46
|
+
|
47
|
+
Elasticsearch supports [query_string query](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html) dsl which is very useful to use internally, but too powerful to use as public interface. Allowing anonymous users to use the dsl may cause not only performance problems but also security risks if your index includes secret types.
|
48
|
+
|
49
|
+
This gem accepts the query_string-query-dsl-like string and converts the string into a query object using other query dsls. At the same time it sanitizes fields in the query.
|
50
|
+
|
51
|
+
INSTALLATION
|
52
|
+
============
|
53
|
+
|
54
|
+
```bash
|
55
|
+
gem install es-query-builder
|
56
|
+
```
|
57
|
+
|
58
|
+
LICENSE
|
59
|
+
=======
|
60
|
+
|
61
|
+
This software is licensed under [MIT license](https://github.com/increments/es-query-builder/tree/master/LICENSE.txt).
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require "es-query-builder/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "es-query-builder"
|
7
|
+
spec.version = EsQueryBuilder::VERSION
|
8
|
+
spec.authors = ["Yuku Takahashi"]
|
9
|
+
spec.email = ["yuku@qiita.com"]
|
10
|
+
spec.summary = "Build a query hash by a simple query string"
|
11
|
+
spec.homepage = "https://github.com/increments/es-query-builder"
|
12
|
+
spec.license = "MIT"
|
13
|
+
|
14
|
+
spec.files = `git ls-files -z`.split("\x0")
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
+
spec.require_paths = ["lib"]
|
18
|
+
|
19
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
20
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
21
|
+
spec.add_development_dependency "rspec"
|
22
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# Public: The class has a responsibility for converting a query string into
|
2
|
+
# a corresponding query hash object for Elasticsearch.
|
3
|
+
#
|
4
|
+
# Examples
|
5
|
+
#
|
6
|
+
# builder = EsQueryBuilder.new(
|
7
|
+
# query_fields: ['query'],
|
8
|
+
# filter_fields: ['filter']
|
9
|
+
# )
|
10
|
+
# # => #<EsQueryBuilder>
|
11
|
+
#
|
12
|
+
# builder.build('term')
|
13
|
+
# # => { match: { '_all' => 'term' } }
|
14
|
+
#
|
15
|
+
# builder.build('query:term')
|
16
|
+
# # => { match: { 'query' => 'hello' } }
|
17
|
+
#
|
18
|
+
# builder.build('filter:term')
|
19
|
+
# # => {
|
20
|
+
# # filtered: {
|
21
|
+
# # query: { match_all: {} },
|
22
|
+
# # filter: { term: { filter: 'hello' } }
|
23
|
+
# # }
|
24
|
+
# # }
|
25
|
+
#
|
26
|
+
# builder.build('query\:term')
|
27
|
+
# # => { match: { '_all' => 'query\:term' } }
|
28
|
+
#
|
29
|
+
# builder.build('unknown:term')
|
30
|
+
# # => { match: { '_all' => 'term' } }
|
31
|
+
class EsQueryBuilder
|
32
|
+
require 'es-query-builder/token'
|
33
|
+
require 'es-query-builder/tokenizer'
|
34
|
+
require 'es-query-builder/parser'
|
35
|
+
require 'es-query-builder/version'
|
36
|
+
|
37
|
+
# Public: Construct the query builder object.
|
38
|
+
#
|
39
|
+
# query_fields - An Array of Strings for specifing allowed quering
|
40
|
+
# types (default: []).
|
41
|
+
# filter_fields - An Array of Strings for specifing allowed filtering
|
42
|
+
# types (default: []).
|
43
|
+
# all_query_fields - A String or an Array of Strings for searching usual
|
44
|
+
# query terms (default: '_all').
|
45
|
+
# hierarchy_fields - An Array of Strings which treats the trailing slash
|
46
|
+
# character as a hierarchy (default: []).
|
47
|
+
#
|
48
|
+
# Returns nothing.
|
49
|
+
def initialize(query_fields: [], filter_fields: [],
|
50
|
+
all_query_fields: '_all', hierarchy_fields: [])
|
51
|
+
@query_fields = query_fields
|
52
|
+
@filter_fields = filter_fields
|
53
|
+
@all_query_fields = all_query_fields
|
54
|
+
@hierarchy_fields = hierarchy_fields
|
55
|
+
end
|
56
|
+
|
57
|
+
# Public: Convert the given query string into a query object.
|
58
|
+
#
|
59
|
+
# query_string - A query String for searching.
|
60
|
+
#
|
61
|
+
# Examples
|
62
|
+
#
|
63
|
+
# build('hello world')
|
64
|
+
# # => {
|
65
|
+
# # bool: {
|
66
|
+
# # must: [
|
67
|
+
# # { match: { '_all' => 'hello' } },
|
68
|
+
# # { match: { '_all' => 'world' } }
|
69
|
+
# # ]
|
70
|
+
# # }
|
71
|
+
# # }
|
72
|
+
#
|
73
|
+
# Returns a Hash for Elasticsearch client or nil.
|
74
|
+
def build(query_string)
|
75
|
+
parser.parse(tokenizer.tokenize(query_string))
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# Internal: Tokenizer for the builder.
|
81
|
+
#
|
82
|
+
# Returns a Tokenizer.
|
83
|
+
def tokenizer
|
84
|
+
@tokenizer ||= Tokenizer.new(@query_fields, @filter_fields)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Internal: Parser for the builder.
|
88
|
+
#
|
89
|
+
# Returns a Parser.
|
90
|
+
def parser
|
91
|
+
@parser ||= Parser.new(@all_query_fields, @hierarchy_fields)
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,224 @@
|
|
1
|
+
class EsQueryBuilder
|
2
|
+
# Public: The class which has a responsibility for creatign a query.
|
3
|
+
#
|
4
|
+
# Note that the term "query" has two different meanings in the terminology of
|
5
|
+
# Elasticsearch. One represents how to retrieve documents from Elasticsearch
|
6
|
+
# and it consists of query and filter, so that is to say the other is a
|
7
|
+
# part of previous one. In this file, "query" and "query hash" represents the
|
8
|
+
# former and the latter respectively:
|
9
|
+
#
|
10
|
+
# "query" = "query hash" + "filter hash"
|
11
|
+
#
|
12
|
+
class Parser
|
13
|
+
# Public: Construct the parser object.
|
14
|
+
#
|
15
|
+
# query_fields - An Array of Strings for specifing allowed quering
|
16
|
+
# types (default: []).
|
17
|
+
# hierarchy_fields - An Array of Strings which treats the trailing slash
|
18
|
+
# character as a hierarchy (default: []).
|
19
|
+
#
|
20
|
+
# Returns nothing.
|
21
|
+
def initialize(all_query_fields = '_all', hierarchy_fields = [])
|
22
|
+
@all_query_fields = all_query_fields
|
23
|
+
@hierarchy_fields = hierarchy_fields
|
24
|
+
end
|
25
|
+
|
26
|
+
# Public: Parse the given tokens and build a query hash.
|
27
|
+
#
|
28
|
+
# tokens - An Array of Tokens.
|
29
|
+
#
|
30
|
+
# Returns a Hash for Elasticsearch client or nil.
|
31
|
+
def parse(tokens)
|
32
|
+
connect_queries(build_queries(tokens))
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
# Internal: Convert the given tokens into sequence of queries.
|
38
|
+
#
|
39
|
+
# tokens - An Array of Tokens.
|
40
|
+
#
|
41
|
+
# Returns an Array of Hashes. Each hash represents a query.
|
42
|
+
def build_queries(tokens)
|
43
|
+
split_by_or_token(tokens).map do |or_less_tokens|
|
44
|
+
query_hash = build_query_hash(or_less_tokens.select(&:query?))
|
45
|
+
filter_hash = build_filter_hash(or_less_tokens.select(&:filter?))
|
46
|
+
create_query(query_hash, filter_hash)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Internal: Merge sequence of queries into a single query.
|
51
|
+
#
|
52
|
+
# queries - An Array of Hashes. Eash hash represents a query.
|
53
|
+
#
|
54
|
+
# Returns a Hash or nil.
|
55
|
+
def connect_queries(queries)
|
56
|
+
case queries.size
|
57
|
+
when 0
|
58
|
+
nil
|
59
|
+
when 1
|
60
|
+
queries.first
|
61
|
+
else
|
62
|
+
{
|
63
|
+
bool: {
|
64
|
+
should: queries
|
65
|
+
}
|
66
|
+
}
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Internal: Divide the given tokens array into sub arrays by 'or' token.
|
71
|
+
#
|
72
|
+
# tokens - An Array of Search::QueryBuilder::Token.
|
73
|
+
#
|
74
|
+
# Examples
|
75
|
+
#
|
76
|
+
# split_by_or_token([<Query>, <OR>, <Query>, <Filter>])
|
77
|
+
# #=> [[<Query>], [<Query>, <Filter>]]
|
78
|
+
#
|
79
|
+
# Returns an Array of Arrays of Tokens.
|
80
|
+
def split_by_or_token(tokens)
|
81
|
+
expressions = [[]]
|
82
|
+
tokens.each do |token|
|
83
|
+
if token.or?
|
84
|
+
expressions << []
|
85
|
+
else
|
86
|
+
expressions.last << token
|
87
|
+
end
|
88
|
+
end
|
89
|
+
expressions.select { |e| e.size > 0 }
|
90
|
+
end
|
91
|
+
|
92
|
+
# Internal: Connect given query hash and filter hash objects.
|
93
|
+
#
|
94
|
+
# query_hash - A Hash represents a query hash.
|
95
|
+
# filter_hash - A Hash represents a filter hash.
|
96
|
+
#
|
97
|
+
# Returns a Hash represents a query.
|
98
|
+
def create_query(query_hash, filter_hash)
|
99
|
+
if filter_hash.size > 0
|
100
|
+
{
|
101
|
+
filtered: {
|
102
|
+
query: query_hash,
|
103
|
+
filter: filter_hash
|
104
|
+
}
|
105
|
+
}
|
106
|
+
else
|
107
|
+
query_hash
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Internal: Build a query hash by query tokens
|
112
|
+
#
|
113
|
+
# query_tokens - An Array of query Tokens.
|
114
|
+
#
|
115
|
+
# Returns a Hash represents a query hash.
|
116
|
+
def build_query_hash(query_tokens)
|
117
|
+
return { match_all: {} } if query_tokens.empty?
|
118
|
+
must, must_not = create_bool_queries(query_tokens)
|
119
|
+
if must.size == 1 && must_not.empty?
|
120
|
+
must.first
|
121
|
+
else
|
122
|
+
bool = {}
|
123
|
+
bool[:must] = must if must.size > 0
|
124
|
+
bool[:must_not] = must_not if must_not.size > 0
|
125
|
+
{ bool: bool }
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Internal: Build a filter parameter hash by query tokens
|
130
|
+
#
|
131
|
+
# filter_tokens - An Array of filter Tokens.
|
132
|
+
#
|
133
|
+
# Returns a Hash represents a filter hash.
|
134
|
+
def build_filter_hash(filter_tokens)
|
135
|
+
return {} if filter_tokens.empty?
|
136
|
+
must, should, must_not = create_bool_filters(filter_tokens)
|
137
|
+
if must.size == 1 && should.empty? && must_not.empty?
|
138
|
+
# Term filter is cached by default.
|
139
|
+
must.first
|
140
|
+
else
|
141
|
+
bool = {}
|
142
|
+
bool[:must] = must if must.size > 0
|
143
|
+
bool[:should] = should if should.size > 0
|
144
|
+
bool[:must_not] = must_not if must_not.size > 0
|
145
|
+
# Bool filter is not cached by default.
|
146
|
+
{ bool: bool.merge(_cache: true) }
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Internal: Create boolean query based with the given query tokens.
|
151
|
+
#
|
152
|
+
# query_tokens - An Array of query Tokens.
|
153
|
+
#
|
154
|
+
# Returns an Array consists of must and must_not query arrays.
|
155
|
+
def create_bool_queries(query_tokens)
|
156
|
+
must, must_not = [], []
|
157
|
+
query_tokens.each do |token|
|
158
|
+
# When the field is not given or invalid one, search by all fields.
|
159
|
+
field = token.field || @all_query_fields
|
160
|
+
queries = token.minus? ? must_not : must
|
161
|
+
if field.is_a?(String)
|
162
|
+
queries << {
|
163
|
+
match: {
|
164
|
+
field => token.term
|
165
|
+
}
|
166
|
+
}
|
167
|
+
else
|
168
|
+
queries << {
|
169
|
+
multi_match: {
|
170
|
+
fields: field,
|
171
|
+
query: token.term
|
172
|
+
}
|
173
|
+
}
|
174
|
+
end
|
175
|
+
end
|
176
|
+
[must, must_not]
|
177
|
+
end
|
178
|
+
|
179
|
+
# Internal: Create boolean filter based on the filter matches.
|
180
|
+
# If a field query in hierarchy fields ends with '/', it matches to all
|
181
|
+
# descendant terms.
|
182
|
+
#
|
183
|
+
# query_tokens - An Array of filter Tokens.
|
184
|
+
#
|
185
|
+
# Examples
|
186
|
+
#
|
187
|
+
# # When 'tag:"foo bar"'
|
188
|
+
# create_bool_filters([...])
|
189
|
+
# # => [[{ term: { tag: 'foo' }}, { term: { tag: 'bar' }], [], []]
|
190
|
+
#
|
191
|
+
# # When '-tag:foo'
|
192
|
+
# create_bool_filters([...])
|
193
|
+
# # => [[], [], [{ term: { tag: 'foo' } }]]
|
194
|
+
#
|
195
|
+
# # Suppose @hierarchy_fields contains 'tag'
|
196
|
+
#
|
197
|
+
# # When 'tag:foo/'
|
198
|
+
# create_bool_filters([...])
|
199
|
+
# # => [[], [{ term: { tag: 'foo' } }, { prefix: { tag: 'foo/' } }], []]
|
200
|
+
#
|
201
|
+
# # When '-tag:foo/'
|
202
|
+
# create_bool_filters([...])
|
203
|
+
# # => [[], [], [{ prefix: { tag: 'foo/' } }, { term: { tag: 'foo' } }]]
|
204
|
+
#
|
205
|
+
# Returns an Array consists of must, should and must_not filters arrays.
|
206
|
+
def create_bool_filters(filter_tokens)
|
207
|
+
must, should, must_not = [], [], []
|
208
|
+
filter_tokens.each do |token|
|
209
|
+
token.term.split.each do |term|
|
210
|
+
if @hierarchy_fields.include?(token.field) && term.end_with?('/')
|
211
|
+
cond = token.minus? ? must_not : should
|
212
|
+
cond << { prefix: { token.field => term.downcase } }
|
213
|
+
# Exactly matches to the tag.
|
214
|
+
cond << { term: { token.field => term[0...-1].downcase } }
|
215
|
+
else
|
216
|
+
cond = token.minus? ? must_not : must
|
217
|
+
cond << { term: { token.field => term.downcase } }
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
[must, should, must_not]
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class EsQueryBuilder
|
2
|
+
class Token
|
3
|
+
attr_reader :full, :field, :term
|
4
|
+
|
5
|
+
TYPE_KINDS = %i(query filter or).freeze
|
6
|
+
|
7
|
+
def initialize(full: nil, minus: nil, field: nil, term: nil, type: nil)
|
8
|
+
@full = full
|
9
|
+
@minus = !!minus
|
10
|
+
@field = field
|
11
|
+
@term = term
|
12
|
+
@type = type
|
13
|
+
end
|
14
|
+
|
15
|
+
def minus?
|
16
|
+
@minus
|
17
|
+
end
|
18
|
+
|
19
|
+
TYPE_KINDS.each do |type|
|
20
|
+
define_method "#{type}?" do
|
21
|
+
@type == type
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
class EsQueryBuilder
|
2
|
+
class Tokenizer
|
3
|
+
QUERY_REGEXP = /
|
4
|
+
(
|
5
|
+
(-)? # Minus
|
6
|
+
(?:(\w+):)? # Field
|
7
|
+
(?:
|
8
|
+
(?:"(.*?)(?<!\\)") # Quoted query
|
9
|
+
|
|
10
|
+
([^\s]+) # Single query
|
11
|
+
)
|
12
|
+
)
|
13
|
+
/x
|
14
|
+
|
15
|
+
OR_CONDITION = /^OR$/i
|
16
|
+
|
17
|
+
# Public: COnstruct the tokenizer object.
|
18
|
+
#
|
19
|
+
# filter_fields - An Array of Strings for specifing allowed filtering
|
20
|
+
# types (default: []).
|
21
|
+
# all_query_fields - The String or Array of Strings for searching usual
|
22
|
+
# query terms (default: '_all').
|
23
|
+
#
|
24
|
+
# Returns nothing.
|
25
|
+
def initialize(query_fields = [], filter_fields = [])
|
26
|
+
@query_fields = query_fields
|
27
|
+
@filter_fields = filter_fields
|
28
|
+
end
|
29
|
+
|
30
|
+
# Public: Tokenize the given query string for parsing it later.
|
31
|
+
#
|
32
|
+
# query_string - The utf8 encoded String.
|
33
|
+
#
|
34
|
+
# Examples
|
35
|
+
#
|
36
|
+
# tokenize('hello OR tag:world')
|
37
|
+
# # => [<Token: @full="hello", @type=:query, ...>,
|
38
|
+
# <Token: @full="OR", @type=:or, ...>,
|
39
|
+
# <Token: @full="tag:world", @type=:filter, ...>]
|
40
|
+
#
|
41
|
+
# Returns an Array of Tokens.
|
42
|
+
def tokenize(query_string)
|
43
|
+
query_string.scan(QUERY_REGEXP).map do |match|
|
44
|
+
create_token(*match)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def create_token(full, minus, field, quoted, simple)
|
51
|
+
if @filter_fields.include?(field)
|
52
|
+
type = :filter
|
53
|
+
elsif OR_CONDITION =~ full
|
54
|
+
type = :or
|
55
|
+
else
|
56
|
+
field = nil unless @query_fields.include?(field)
|
57
|
+
type = :query
|
58
|
+
end
|
59
|
+
|
60
|
+
Token.new(
|
61
|
+
full: full,
|
62
|
+
minus: minus,
|
63
|
+
field: field,
|
64
|
+
term: quoted || simple,
|
65
|
+
type: type
|
66
|
+
)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,496 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe EsQueryBuilder do
|
4
|
+
let(:query_builder) do
|
5
|
+
described_class.new(param)
|
6
|
+
end
|
7
|
+
|
8
|
+
let(:param) do
|
9
|
+
{}
|
10
|
+
end
|
11
|
+
|
12
|
+
describe '#build' do
|
13
|
+
subject do
|
14
|
+
query_builder.build(query_string)
|
15
|
+
end
|
16
|
+
|
17
|
+
context 'when a term query is given' do
|
18
|
+
let(:query_string) do
|
19
|
+
term
|
20
|
+
end
|
21
|
+
|
22
|
+
let(:term) do
|
23
|
+
'hello'
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'returns a match query' do
|
27
|
+
should eq(
|
28
|
+
match: {
|
29
|
+
'_all' => term
|
30
|
+
}
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
context 'and the query starts with a minus character' do
|
35
|
+
let(:query_string) do
|
36
|
+
'-' + term
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'returns a bool query with must_not condition' do
|
40
|
+
should eq(
|
41
|
+
bool: {
|
42
|
+
must_not: [
|
43
|
+
{
|
44
|
+
match: {
|
45
|
+
'_all' => term
|
46
|
+
}
|
47
|
+
}
|
48
|
+
]
|
49
|
+
}
|
50
|
+
)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
context 'and it is constructed with all_query_fields' do
|
55
|
+
let(:param) do
|
56
|
+
{ all_query_fields: all_query_fields }
|
57
|
+
end
|
58
|
+
|
59
|
+
let(:all_query_fields) do
|
60
|
+
['field']
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'returns a bool query for the specified query fields' do
|
64
|
+
should eq(
|
65
|
+
multi_match: {
|
66
|
+
fields: all_query_fields,
|
67
|
+
query: term
|
68
|
+
}
|
69
|
+
)
|
70
|
+
end
|
71
|
+
|
72
|
+
context 'and the query starts with a minus char' do
|
73
|
+
let(:query_string) do
|
74
|
+
'-' + term
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'returns a must_not query for the specified query fields' do
|
78
|
+
should eq(
|
79
|
+
bool: {
|
80
|
+
must_not: [
|
81
|
+
{
|
82
|
+
multi_match: {
|
83
|
+
fields: all_query_fields,
|
84
|
+
query: term
|
85
|
+
}
|
86
|
+
}
|
87
|
+
]
|
88
|
+
}
|
89
|
+
)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
context 'when term queries are given' do
|
96
|
+
let(:query_string) do
|
97
|
+
"#{term_1} #{term_2}"
|
98
|
+
end
|
99
|
+
|
100
|
+
let(:term_1) do
|
101
|
+
'hello'
|
102
|
+
end
|
103
|
+
|
104
|
+
let(:term_2) do
|
105
|
+
'world'
|
106
|
+
end
|
107
|
+
|
108
|
+
it 'returns a bool query with must match queries' do
|
109
|
+
should eq(
|
110
|
+
bool: {
|
111
|
+
must: [
|
112
|
+
{
|
113
|
+
match: {
|
114
|
+
'_all' => term_1
|
115
|
+
}
|
116
|
+
},
|
117
|
+
{
|
118
|
+
match: {
|
119
|
+
'_all' => term_2
|
120
|
+
}
|
121
|
+
}
|
122
|
+
]
|
123
|
+
}
|
124
|
+
)
|
125
|
+
end
|
126
|
+
|
127
|
+
context 'and one query starts with a minus character' do
|
128
|
+
let(:query_string) do
|
129
|
+
"#{term_1} -#{term_2}"
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'returns a bool query with must and must_not queries' do
|
133
|
+
should eq(
|
134
|
+
bool: {
|
135
|
+
must: [
|
136
|
+
{
|
137
|
+
match: {
|
138
|
+
'_all' => term_1
|
139
|
+
}
|
140
|
+
}
|
141
|
+
],
|
142
|
+
must_not: [
|
143
|
+
{
|
144
|
+
match: {
|
145
|
+
'_all' => term_2
|
146
|
+
}
|
147
|
+
}
|
148
|
+
]
|
149
|
+
}
|
150
|
+
)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
context 'and both of them start with a minus character' do
|
155
|
+
let(:query_string) do
|
156
|
+
"-#{term_1} -#{term_2}"
|
157
|
+
end
|
158
|
+
|
159
|
+
it 'returns a bool query with must_not queries' do
|
160
|
+
should eq(
|
161
|
+
bool: {
|
162
|
+
must_not: [
|
163
|
+
{
|
164
|
+
match: {
|
165
|
+
'_all' => term_1
|
166
|
+
}
|
167
|
+
},
|
168
|
+
{
|
169
|
+
match: {
|
170
|
+
'_all' => term_2
|
171
|
+
}
|
172
|
+
}
|
173
|
+
]
|
174
|
+
}
|
175
|
+
)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
context 'and one query is a filter query' do
|
180
|
+
let(:query_string) do
|
181
|
+
"#{field1}:#{term_1} #{term_2}"
|
182
|
+
end
|
183
|
+
|
184
|
+
let(:param) do
|
185
|
+
{ filter_fields: [field1] }
|
186
|
+
end
|
187
|
+
|
188
|
+
let(:field1) do
|
189
|
+
'field'
|
190
|
+
end
|
191
|
+
|
192
|
+
it 'returns a filtered query' do
|
193
|
+
should eq(
|
194
|
+
filtered: {
|
195
|
+
query: {
|
196
|
+
match: {
|
197
|
+
'_all' => term_2
|
198
|
+
}
|
199
|
+
},
|
200
|
+
filter: {
|
201
|
+
term: {
|
202
|
+
field1 => term_1
|
203
|
+
}
|
204
|
+
}
|
205
|
+
}
|
206
|
+
)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
context 'when a field query is given' do
|
212
|
+
let(:query_string) do
|
213
|
+
"#{field}:#{field_query}"
|
214
|
+
end
|
215
|
+
|
216
|
+
let(:field) do
|
217
|
+
'tag'
|
218
|
+
end
|
219
|
+
|
220
|
+
let(:field_query) do
|
221
|
+
'hello'
|
222
|
+
end
|
223
|
+
|
224
|
+
context 'and it is a part of query_fields' do
|
225
|
+
let(:param) do
|
226
|
+
{ query_fields: [field] }
|
227
|
+
end
|
228
|
+
|
229
|
+
it 'returns a match query for the field' do
|
230
|
+
should eq(
|
231
|
+
match: {
|
232
|
+
field => field_query
|
233
|
+
}
|
234
|
+
)
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
context 'and it is a part of filter_fields' do
|
239
|
+
let(:param) do
|
240
|
+
{ filter_fields: [field] }
|
241
|
+
end
|
242
|
+
|
243
|
+
it 'returns a filtered query object' do
|
244
|
+
should eq(
|
245
|
+
filtered: {
|
246
|
+
query: {
|
247
|
+
match_all: {}
|
248
|
+
},
|
249
|
+
filter: {
|
250
|
+
term: {
|
251
|
+
field => field_query
|
252
|
+
}
|
253
|
+
}
|
254
|
+
}
|
255
|
+
)
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
context 'and the field is a part of neither query_fields nor filter_fields' do
|
260
|
+
let(:param) do
|
261
|
+
{ all_query_fields: all_query_fields }
|
262
|
+
end
|
263
|
+
|
264
|
+
let(:all_query_fields) do
|
265
|
+
["foo_#{field}", "bar_#{field}"]
|
266
|
+
end
|
267
|
+
|
268
|
+
it 'returns a match query for all_query_fields' do
|
269
|
+
should eq(
|
270
|
+
multi_match: {
|
271
|
+
fields: all_query_fields,
|
272
|
+
query: field_query
|
273
|
+
}
|
274
|
+
)
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
context 'when a quoted query is given' do
|
280
|
+
let(:query_string) do
|
281
|
+
%("#{quoted_query}")
|
282
|
+
end
|
283
|
+
|
284
|
+
let(:quoted_query) do
|
285
|
+
'hello world'
|
286
|
+
end
|
287
|
+
|
288
|
+
it 'returns a match query with the whole quoted query' do
|
289
|
+
should eq(
|
290
|
+
match: {
|
291
|
+
'_all' => quoted_query
|
292
|
+
}
|
293
|
+
)
|
294
|
+
end
|
295
|
+
|
296
|
+
context 'and it is constructed with all_query_fields' do
|
297
|
+
let(:param) do
|
298
|
+
{ all_query_fields: all_query_fields }
|
299
|
+
end
|
300
|
+
|
301
|
+
let(:all_query_fields) do
|
302
|
+
%w(foo bar)
|
303
|
+
end
|
304
|
+
|
305
|
+
it 'returns a multi match query with the whole quoted query' do
|
306
|
+
should eq(
|
307
|
+
multi_match: {
|
308
|
+
fields: all_query_fields,
|
309
|
+
query: quoted_query
|
310
|
+
}
|
311
|
+
)
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
context 'when a quoted field query is given' do
|
317
|
+
let(:query_string) do
|
318
|
+
"#{field}:\"#{quoted_query}\""
|
319
|
+
end
|
320
|
+
|
321
|
+
let(:field) do
|
322
|
+
'field'
|
323
|
+
end
|
324
|
+
|
325
|
+
let(:quoted_query) do
|
326
|
+
"#{term_1} #{term_2}"
|
327
|
+
end
|
328
|
+
|
329
|
+
let(:term_1) do
|
330
|
+
'hello'
|
331
|
+
end
|
332
|
+
|
333
|
+
let(:term_2) do
|
334
|
+
'world'
|
335
|
+
end
|
336
|
+
|
337
|
+
context 'and the field is a part of query_fields' do
|
338
|
+
let(:param) do
|
339
|
+
{ query_fields: [field] }
|
340
|
+
end
|
341
|
+
|
342
|
+
it 'returns a match query for the field' do
|
343
|
+
should eq(
|
344
|
+
match: {
|
345
|
+
field => quoted_query
|
346
|
+
}
|
347
|
+
)
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
context 'and the field is a part of filter_fields' do
|
352
|
+
let(:param) do
|
353
|
+
{ filter_fields: [field] }
|
354
|
+
end
|
355
|
+
|
356
|
+
it 'returns a filtered query with must conditions' do
|
357
|
+
should eq(
|
358
|
+
filtered: {
|
359
|
+
query: {
|
360
|
+
match_all: {}
|
361
|
+
},
|
362
|
+
filter: {
|
363
|
+
bool: {
|
364
|
+
must: [
|
365
|
+
{
|
366
|
+
term: {
|
367
|
+
field => term_1
|
368
|
+
}
|
369
|
+
},
|
370
|
+
{
|
371
|
+
term: {
|
372
|
+
field => term_2
|
373
|
+
}
|
374
|
+
}
|
375
|
+
],
|
376
|
+
_cache: true
|
377
|
+
}
|
378
|
+
}
|
379
|
+
}
|
380
|
+
)
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
context 'and the field is a part of neither query_fields nor filter_fields' do
|
385
|
+
it 'returns a match query for _all field' do
|
386
|
+
should eq(
|
387
|
+
match: {
|
388
|
+
'_all' => quoted_query
|
389
|
+
}
|
390
|
+
)
|
391
|
+
end
|
392
|
+
|
393
|
+
context 'and the query includes "OR"' do
|
394
|
+
let(:quoted_query) do
|
395
|
+
"#{term_1} OR #{term_2}"
|
396
|
+
end
|
397
|
+
|
398
|
+
it 'returns a match query inclues "OR" for _all field' do
|
399
|
+
should eq(
|
400
|
+
match: {
|
401
|
+
'_all' => quoted_query
|
402
|
+
}
|
403
|
+
)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
context 'when an OR condition is given' do
|
410
|
+
context 'and that is all' do
|
411
|
+
let(:query_string) do
|
412
|
+
'OR'
|
413
|
+
end
|
414
|
+
|
415
|
+
it { should be_nil }
|
416
|
+
end
|
417
|
+
|
418
|
+
context 'and it does not connect queries' do
|
419
|
+
it 'does not returns a bool query' do
|
420
|
+
expect(query_builder.build('OR term')).not_to have_key 'bool'
|
421
|
+
expect(query_builder.build('term OR')).not_to have_key 'bool'
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
context 'and it connects queries' do
|
426
|
+
let(:query_string) do
|
427
|
+
"#{term_1} #{field_2}:#{term_2} OR #{field_3}:#{term_3}"
|
428
|
+
end
|
429
|
+
|
430
|
+
let(:param) do
|
431
|
+
{
|
432
|
+
query_fields: [field_2],
|
433
|
+
filter_fields: [field_3]
|
434
|
+
}
|
435
|
+
end
|
436
|
+
|
437
|
+
let(:term_1) do
|
438
|
+
'hello'
|
439
|
+
end
|
440
|
+
|
441
|
+
let(:field_2) do
|
442
|
+
'title'
|
443
|
+
end
|
444
|
+
|
445
|
+
let(:term_2) do
|
446
|
+
'world'
|
447
|
+
end
|
448
|
+
|
449
|
+
let(:field_3) do
|
450
|
+
'user'
|
451
|
+
end
|
452
|
+
|
453
|
+
let(:term_3) do
|
454
|
+
'qiitan'
|
455
|
+
end
|
456
|
+
|
457
|
+
it 'returns a bool query with should conditions' do
|
458
|
+
should eq(
|
459
|
+
bool: {
|
460
|
+
should: [
|
461
|
+
{
|
462
|
+
bool: {
|
463
|
+
must: [
|
464
|
+
{
|
465
|
+
match: {
|
466
|
+
'_all' => term_1
|
467
|
+
},
|
468
|
+
},
|
469
|
+
{
|
470
|
+
match: {
|
471
|
+
field_2 => term_2
|
472
|
+
}
|
473
|
+
}
|
474
|
+
]
|
475
|
+
}
|
476
|
+
},
|
477
|
+
{
|
478
|
+
filtered: {
|
479
|
+
query: {
|
480
|
+
match_all: {}
|
481
|
+
},
|
482
|
+
filter: {
|
483
|
+
term: {
|
484
|
+
field_3 => term_3
|
485
|
+
}
|
486
|
+
}
|
487
|
+
}
|
488
|
+
}
|
489
|
+
]
|
490
|
+
}
|
491
|
+
)
|
492
|
+
end
|
493
|
+
end
|
494
|
+
end
|
495
|
+
end
|
496
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: es-query-builder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Yuku Takahashi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description:
|
56
|
+
email:
|
57
|
+
- yuku@qiita.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".travis.yml"
|
64
|
+
- Gemfile
|
65
|
+
- LICENSE.txt
|
66
|
+
- README.md
|
67
|
+
- Rakefile
|
68
|
+
- es-query-builder.gemspec
|
69
|
+
- lib/es-query-builder.rb
|
70
|
+
- lib/es-query-builder/parser.rb
|
71
|
+
- lib/es-query-builder/token.rb
|
72
|
+
- lib/es-query-builder/tokenizer.rb
|
73
|
+
- lib/es-query-builder/version.rb
|
74
|
+
- spec/es_query_builder_spec.rb
|
75
|
+
- spec/spec_helper.rb
|
76
|
+
homepage: https://github.com/increments/es-query-builder
|
77
|
+
licenses:
|
78
|
+
- MIT
|
79
|
+
metadata: {}
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options: []
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
requirements: []
|
95
|
+
rubyforge_project:
|
96
|
+
rubygems_version: 2.2.2
|
97
|
+
signing_key:
|
98
|
+
specification_version: 4
|
99
|
+
summary: Build a query hash by a simple query string
|
100
|
+
test_files:
|
101
|
+
- spec/es_query_builder_spec.rb
|
102
|
+
- spec/spec_helper.rb
|