plunk 0.2.11 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/AUTHORS +1 -0
- data/Gemfile.lock +1 -1
- data/lib/plunk.rb +6 -1
- data/lib/plunk/helper.rb +55 -0
- data/lib/plunk/parser.rb +70 -48
- data/lib/plunk/result_set.rb +4 -49
- data/lib/plunk/transformer.rb +33 -114
- data/plunk.gemspec +9 -9
- data/spec/basic_spec.rb +30 -8
- data/spec/boolean_spec.rb +73 -27
- data/spec/chained_search_spec.rb +36 -44
- data/spec/field_value_spec.rb +28 -29
- data/spec/last_spec.rb +52 -49
- data/spec/nested_search_spec.rb +7 -7
- data/spec/regexp_spec.rb +16 -12
- data/spec/shared/dummy_client.rb +14 -0
- data/spec/spec_helper.rb +7 -2
- metadata +55 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bb14cb961c9f1fcfadaeefc60ca753cb061bf2eb
|
4
|
+
data.tar.gz: ee823ac07dfcb711d8d35becf70aea8a4f3cde05
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d77fa8591e0c76809bbec4e24d408073836dd517bd35fad324ed389826fa02d4a989933e2fc1c71c5a61628dfea50c81416ca3e20333789b4a5e73a676c2810d
|
7
|
+
data.tar.gz: 6c76177d7f17f571f9f6ad46808ea6c99b0c2eadd4fc87256345e44d5111ebad5ca29171b5484676ad16ba3d13c1089c962c638a1d1016568616c2515afbc50f
|
data/AUTHORS
CHANGED
data/Gemfile.lock
CHANGED
data/lib/plunk.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'elasticsearch'
|
2
2
|
|
3
|
+
require 'plunk/helper'
|
3
4
|
require 'plunk/utils'
|
4
5
|
require 'plunk/parser'
|
5
6
|
require 'plunk/transformer'
|
@@ -32,6 +33,10 @@ module Plunk
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def self.search(query_string)
|
35
|
-
|
36
|
+
ResultSet.new(
|
37
|
+
transformer.apply(
|
38
|
+
parser.parse(query_string)
|
39
|
+
)
|
40
|
+
).eval
|
36
41
|
end
|
37
42
|
end
|
data/lib/plunk/helper.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'active_support/core_ext'
|
2
|
+
|
3
|
+
module Plunk
|
4
|
+
class Helper
|
5
|
+
def self.query_builder(query_string)
|
6
|
+
{
|
7
|
+
query: {
|
8
|
+
query_string: {
|
9
|
+
query: query_string
|
10
|
+
}
|
11
|
+
}
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.filter_builder(filter)
|
16
|
+
{
|
17
|
+
query: {
|
18
|
+
filtered: {
|
19
|
+
filter: filter
|
20
|
+
}
|
21
|
+
}
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.range_builder(range_min, range_max)
|
26
|
+
{
|
27
|
+
range: {
|
28
|
+
Plunk.timestamp_field => {
|
29
|
+
gte: range_min,
|
30
|
+
gte: range_max
|
31
|
+
}
|
32
|
+
}
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.time_query_to_timestamp(int_quantity, quantifier)
|
37
|
+
case quantifier
|
38
|
+
when 's'
|
39
|
+
int_quantity.seconds.ago
|
40
|
+
when 'm'
|
41
|
+
int_quantity.minutes.ago
|
42
|
+
when 'h'
|
43
|
+
int_quantity.hours.ago
|
44
|
+
when 'd'
|
45
|
+
int_quantity.days.ago
|
46
|
+
when 'w'
|
47
|
+
int_quantity.weeks.ago
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.timestamp_format(time)
|
52
|
+
time.utc.to_datetime.iso8601(3)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/plunk/parser.rb
CHANGED
@@ -3,14 +3,11 @@ require 'parslet'
|
|
3
3
|
module Plunk
|
4
4
|
class Parser < Parslet::Parser
|
5
5
|
|
6
|
-
|
7
|
-
lparen >> atom >> rparen
|
8
|
-
end
|
6
|
+
# BUILDING BLOCKS
|
9
7
|
|
10
8
|
# Single character rules
|
11
9
|
rule(:lparen) { str('(') >> space? }
|
12
10
|
rule(:rparen) { str(')') >> space? }
|
13
|
-
rule(:comma) { str(',') >> space? }
|
14
11
|
rule(:digit) { match('[0-9]') }
|
15
12
|
rule(:space) { match('\s').repeat(1) }
|
16
13
|
rule(:space?) { space.maybe }
|
@@ -21,6 +18,8 @@ module Plunk
|
|
21
18
|
str('-').maybe >> digit.repeat(1) >> str('.') >> digit.repeat(1) >> space?
|
22
19
|
}
|
23
20
|
rule(:number) { integer | float }
|
21
|
+
|
22
|
+
# Dates
|
24
23
|
rule(:datetime) {
|
25
24
|
# 1979-05-27T07:32:00Z
|
26
25
|
digit.repeat(4) >> str("-") >>
|
@@ -30,10 +29,11 @@ module Plunk
|
|
30
29
|
digit.repeat(2) >> str(":") >>
|
31
30
|
digit.repeat(2) >> str("Z")
|
32
31
|
}
|
32
|
+
|
33
|
+
# Strings
|
33
34
|
rule(:escaped_special) {
|
34
35
|
str("\\") >> match['0tnr"\\\\']
|
35
36
|
}
|
36
|
-
|
37
37
|
rule(:string_special) {
|
38
38
|
match['\0\t\n\r"\\\\']
|
39
39
|
}
|
@@ -43,79 +43,101 @@ module Plunk
|
|
43
43
|
str('"')
|
44
44
|
}
|
45
45
|
|
46
|
-
#
|
47
|
-
rule(:
|
48
|
-
|
49
|
-
rule(:
|
50
|
-
rule(:searchop) { match('[=]').as(:op) }
|
46
|
+
# Booleans
|
47
|
+
rule(:and_operator) { (str('and') | str('AND') | str('&')) >> space? }
|
48
|
+
rule(:or_operator) { (str('or') | str('OR') | str('|')) >> space? }
|
49
|
+
rule(:not_operator) { (str('not') | str('NOT') | str('~')) >> space? }
|
51
50
|
|
52
|
-
rule(:query_value) { string | wildcard | datetime | number }
|
53
51
|
|
54
|
-
#
|
55
|
-
rule(:concatop) { (str('OR') | str('AND')) >> space? }
|
56
|
-
rule(:negateop) { str('NOT') >> space? }
|
57
|
-
rule(:operator) { match('[|]').as(:op) >> space? }
|
58
|
-
rule(:timerange) {
|
59
|
-
integer.as(:quantity) >> match('s|m|h|d|w').as(:quantifier)
|
60
|
-
}
|
52
|
+
# COMMANDS
|
61
53
|
|
62
|
-
#
|
63
|
-
rule(:
|
64
|
-
|
54
|
+
# Command parts
|
55
|
+
rule(:identifier) { match('[^=\s)(|]').repeat(1) >> match('[^=\s]').repeat }
|
56
|
+
rule(:wildcard) {
|
57
|
+
(lparen >> wildcard >> rparen) |
|
58
|
+
match('[^=\s|)(]').repeat(1)
|
65
59
|
}
|
66
|
-
|
67
|
-
rule(:
|
68
|
-
|
60
|
+
rule(:query_value) { string | wildcard | datetime | number }
|
61
|
+
rule(:searchop) { match['='] }
|
62
|
+
rule(:rhs) {
|
63
|
+
regexp | query_value
|
69
64
|
}
|
70
65
|
|
71
|
-
#
|
72
|
-
rule(:
|
73
|
-
|
66
|
+
# Field = Value
|
67
|
+
rule(:field_value) {
|
68
|
+
identifier.as(:field) >> space? >>
|
69
|
+
searchop >> space? >>
|
70
|
+
(rhs.as(:value) | subsearch.as(:subsearch))
|
74
71
|
}
|
75
72
|
|
76
|
-
#
|
77
|
-
rule(:
|
78
|
-
|
79
|
-
}
|
80
|
-
rule(:booleanparen) {
|
81
|
-
lparen >> space? >> booleanop >> space? >> rparen
|
73
|
+
# Value-only
|
74
|
+
rule(:value_only) {
|
75
|
+
rhs.as(:value)
|
82
76
|
}
|
83
77
|
|
78
|
+
# Regexp
|
84
79
|
rule(:regexp) {
|
85
80
|
str('/') >> (str('\/') | match('[^/]')).repeat >> str('/')
|
86
81
|
}
|
87
82
|
|
83
|
+
# Last
|
88
84
|
rule(:last) {
|
89
|
-
str(
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
rule(:search) {
|
94
|
-
identifier.as(:field) >> space? >> searchop >> space? >>
|
95
|
-
rhs.as(:value) | rhs.as(:match)
|
85
|
+
str('last') >>
|
86
|
+
space >>
|
87
|
+
integer.as(:quantity) >>
|
88
|
+
match('s|m|h|d|w').as(:quantifier)
|
96
89
|
}
|
97
90
|
|
91
|
+
# Subsearch
|
98
92
|
rule(:subsearch) {
|
99
93
|
str('`') >> space? >> nested_search >> str('`')
|
100
94
|
}
|
101
|
-
|
102
95
|
rule(:nested_search) {
|
103
|
-
|
96
|
+
plunk_query.as(:initial_query) >> space? >> str('|') >> space? >>
|
104
97
|
match('[^`]').repeat.as(:extractors)
|
105
98
|
}
|
106
99
|
|
107
|
-
|
108
|
-
|
100
|
+
# Reference your custom commands here to make them eligible for parsing
|
101
|
+
# NOTE: order matters!
|
102
|
+
rule(:command) {
|
103
|
+
(
|
104
|
+
last |
|
105
|
+
field_value |
|
106
|
+
value_only
|
107
|
+
).as(:command) >> space?
|
109
108
|
}
|
110
109
|
|
111
|
-
|
112
|
-
|
110
|
+
|
111
|
+
# QUERY JOINING
|
112
|
+
|
113
|
+
rule(:negated_command) {
|
114
|
+
(not_operator >> command.as(:not)) |
|
115
|
+
command
|
116
|
+
}
|
117
|
+
rule(:primary) { lparen >> or_operation >> rparen | negated_command }
|
118
|
+
|
119
|
+
rule(:negated_and) {
|
120
|
+
(not_operator >> and_operation.as(:not)) |
|
121
|
+
and_operation
|
122
|
+
}
|
123
|
+
rule(:and_operation) {
|
124
|
+
(primary.as(:left) >> and_operator >>
|
125
|
+
negated_and.as(:right)).as(:and) |
|
126
|
+
primary }
|
127
|
+
|
128
|
+
rule(:negated_or) {
|
129
|
+
(not_operator >> or_operation.as(:not)) |
|
130
|
+
or_operation
|
113
131
|
}
|
132
|
+
rule(:or_operation) {
|
133
|
+
(and_operation.as(:left) >> or_operator >>
|
134
|
+
negated_or.as(:right)).as(:or) |
|
135
|
+
and_operation }
|
114
136
|
|
115
137
|
rule(:plunk_query) {
|
116
|
-
space? >>
|
138
|
+
space? >> or_operation >> space?
|
117
139
|
}
|
118
140
|
|
119
|
-
root
|
141
|
+
root(:plunk_query)
|
120
142
|
end
|
121
143
|
end
|
data/lib/plunk/result_set.rb
CHANGED
@@ -2,60 +2,15 @@ module Plunk
|
|
2
2
|
class ResultSet
|
3
3
|
attr_accessor :query, :query_string
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@query = { query: { filtered: {}}}
|
7
|
-
|
8
|
-
if opts.size >= 3 # use "and" filter to AND filters
|
9
|
-
@query_string = opts[:query_string]
|
10
|
-
@query[:query][:filtered][:query] = {
|
11
|
-
query_string: {
|
12
|
-
query: opts[:query_string] }}
|
13
|
-
@query[:query][:filtered][:filter] = {
|
14
|
-
and: [
|
15
|
-
range: {
|
16
|
-
Plunk.timestamp_field => {
|
17
|
-
gte: opts[:start_time],
|
18
|
-
lte: opts[:end_time] }}]}
|
19
|
-
else
|
20
|
-
if @query_string = opts[:query_string]
|
21
|
-
@query[:query][:filtered][:query] = {
|
22
|
-
query_string: {
|
23
|
-
query: opts[:query_string] }}
|
24
|
-
elsif opts[:start_time] and opts[:end_time]
|
25
|
-
@query[:query][:filtered][:query] = {
|
26
|
-
range: {
|
27
|
-
Plunk.timestamp_field => {
|
28
|
-
gte: opts[:start_time],
|
29
|
-
lte: opts[:end_time] }}}
|
30
|
-
end
|
31
|
-
end
|
5
|
+
def initialize(filter)
|
6
|
+
@query = { query: { filtered: { filter: filter }}}
|
32
7
|
end
|
33
8
|
|
34
9
|
def eval
|
35
10
|
Plunk.elasticsearch_client.search(
|
36
|
-
body: @query
|
11
|
+
body: @query,
|
37
12
|
size: Plunk.max_number_of_hits || 10
|
38
|
-
)
|
39
|
-
end
|
40
|
-
|
41
|
-
# merges multiple queries with implicit AND
|
42
|
-
def self.merge(result_sets)
|
43
|
-
first = result_sets.delete_at 0
|
44
|
-
|
45
|
-
first.query[:query][:filtered][:filter] ||= {}
|
46
|
-
first.query[:query][:filtered][:filter][:and] ||= []
|
47
|
-
|
48
|
-
result_sets.each do |result_set|
|
49
|
-
first.query[:query][:filtered][:filter][:and] <<
|
50
|
-
result_set.query[:query][:filtered]
|
51
|
-
|
52
|
-
if result_set.query[:query][:filtered][:filter]
|
53
|
-
first.query[:query][:filtered][:filter][:and] +=
|
54
|
-
result_set.query[:query][:filtered][:filter][:and]
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
first
|
13
|
+
) if @query
|
59
14
|
end
|
60
15
|
end
|
61
16
|
end
|
data/lib/plunk/transformer.rb
CHANGED
@@ -1,135 +1,54 @@
|
|
1
1
|
require 'parslet'
|
2
|
-
require 'active_support/core_ext'
|
3
2
|
|
4
3
|
module Plunk
|
5
|
-
|
6
|
-
class Helper
|
7
|
-
def self.time_query_to_timestamp(int_quantity, quantifier)
|
8
|
-
case quantifier
|
9
|
-
when 's'
|
10
|
-
int_quantity.seconds.ago
|
11
|
-
when 'm'
|
12
|
-
int_quantity.minutes.ago
|
13
|
-
when 'h'
|
14
|
-
int_quantity.hours.ago
|
15
|
-
when 'd'
|
16
|
-
int_quantity.days.ago
|
17
|
-
when 'w'
|
18
|
-
int_quantity.weeks.ago
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.timestamp_format(time)
|
23
|
-
time.utc.to_datetime.iso8601(3)
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.time_range_hash(start_time, end_time)
|
27
|
-
{
|
28
|
-
start_time: Plunk::Helper.timestamp_format(start_time),
|
29
|
-
end_time: Plunk::Helper.timestamp_format(end_time)
|
30
|
-
}
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
4
|
class Transformer < Parslet::Transform
|
35
5
|
|
36
|
-
#
|
37
|
-
rule(
|
6
|
+
# Field = Value
|
7
|
+
rule(command: {
|
38
8
|
field: simple(:field),
|
39
|
-
value:
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
timerange: {
|
45
|
-
quantity: simple(:quantity),
|
46
|
-
quantifier: simple(:quantifier)
|
47
|
-
}) do
|
48
|
-
|
49
|
-
int_quantity = quantity.to_s.to_i
|
50
|
-
start_time = Plunk::Helper.time_query_to_timestamp(int_quantity, quantifier)
|
51
|
-
end_time = Time.now
|
52
|
-
|
53
|
-
# recursively apply nested query
|
54
|
-
result_set = Plunk::Transformer.new.apply(initial_query)
|
55
|
-
|
56
|
-
json = JSON.parse result_set.eval
|
57
|
-
values = Plunk::Utils.extract_values json, extractors.to_s.split(',')
|
58
|
-
|
59
|
-
result_set_params = Plunk::Helper.time_range_hash(start_time, end_time)
|
60
|
-
if values.empty?
|
61
|
-
result_set_params.merge!(query_string: "#{field}:(#{values.uniq.join(' OR ')})",)
|
62
|
-
end
|
63
|
-
Plunk::ResultSet.new(result_set_params)
|
9
|
+
value: simple(:value)
|
10
|
+
}) do
|
11
|
+
Helper.query_builder(
|
12
|
+
String(field) + ":" + String(value)
|
13
|
+
)
|
64
14
|
end
|
65
15
|
|
66
|
-
|
67
|
-
|
16
|
+
# Value-only
|
17
|
+
rule(command: { value: simple(:value) }) do
|
18
|
+
Helper.query_builder(String(value))
|
68
19
|
end
|
69
20
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
# recursively apply nested query
|
80
|
-
result_set = Transformer.new.apply(initial_query)
|
81
|
-
|
82
|
-
json = JSON.parse result_set.eval
|
83
|
-
values = Utils.extract_values json, extractors.to_s.split(',')
|
21
|
+
rule(command: {
|
22
|
+
quantity: simple(:quantity),
|
23
|
+
quantifier: simple(:quantifier)
|
24
|
+
}) do
|
25
|
+
start_timestamp = Helper.time_query_to_timestamp(
|
26
|
+
Integer(quantity),
|
27
|
+
String(quantifier)
|
28
|
+
)
|
84
29
|
|
85
|
-
|
86
|
-
|
87
|
-
else
|
88
|
-
ResultSet.new(query_string: "#{field}:(#{values.uniq.join(' OR ')})")
|
89
|
-
end
|
90
|
-
end
|
30
|
+
start_time = Helper.timestamp_format start_timestamp
|
31
|
+
end_time = Helper.timestamp_format(Time.now)
|
91
32
|
|
92
|
-
|
93
|
-
rule(field: simple(:field), value: simple(:value), op: '=') do
|
94
|
-
ResultSet.new(query_string: "#{field}:#{value}")
|
33
|
+
Helper.range_builder(start_time, end_time)
|
95
34
|
end
|
96
35
|
|
97
|
-
rule(
|
98
|
-
|
99
|
-
quantity: simple(:quantity),
|
100
|
-
quantifier: simple(:quantifier)
|
101
|
-
}) do
|
102
|
-
|
103
|
-
int_quantity = quantity.to_s.to_i
|
104
|
-
start_time = Plunk::Helper.time_query_to_timestamp(int_quantity, quantifier)
|
105
|
-
end_time = Time.now
|
106
|
-
|
107
|
-
result_set_params = Plunk::Helper.time_range_hash(start_time, end_time)
|
108
|
-
Plunk::ResultSet.new(result_set_params)
|
36
|
+
rule(:negate => subtree(:not)) do
|
37
|
+
{ not: negate }
|
109
38
|
end
|
110
39
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
quantifier: simple(:quantifier)
|
117
|
-
}) do
|
118
|
-
|
119
|
-
int_quantity = quantity.to_s.to_i
|
120
|
-
start_time = Plunk::Helper.time_query_to_timestamp(int_quantity, quantifier)
|
121
|
-
end_time = Time.now
|
122
|
-
|
123
|
-
result_set_params = Plunk::Helper.time_range_hash(start_time, end_time)
|
124
|
-
result_set_params.merge!(query_string: result_set.query_string)
|
125
|
-
Plunk::ResultSet.new(result_set_params)
|
40
|
+
rule(:or => {
|
41
|
+
left: subtree(:left),
|
42
|
+
right: subtree(:right)
|
43
|
+
}) do
|
44
|
+
{ or: [left, right] }
|
126
45
|
end
|
127
46
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
) do
|
132
|
-
|
47
|
+
rule(:and => {
|
48
|
+
left: subtree(:left),
|
49
|
+
right: subtree(:right)
|
50
|
+
}) do
|
51
|
+
{ and: [left, right] }
|
133
52
|
end
|
134
53
|
end
|
135
54
|
end
|