peruse 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.ruby-version +1 -0
- data/AUTHORS +2 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +63 -0
- data/LICENSE +20 -0
- data/README.md +118 -0
- data/Rakefile +5 -0
- data/bin/peruse +75 -0
- data/examples/simple.json +25 -0
- data/examples/simple.rb +10 -0
- data/lib/peruse.rb +53 -0
- data/lib/peruse/helper.rb +90 -0
- data/lib/peruse/parser.rb +181 -0
- data/lib/peruse/result_set.rb +16 -0
- data/lib/peruse/transformer.rb +103 -0
- data/lib/peruse/utils.rb +25 -0
- data/lib/peruse/version.rb +3 -0
- data/peruse.gemspec +23 -0
- data/spec/basic_spec.rb +39 -0
- data/spec/binstub_spec.rb +33 -0
- data/spec/boolean_spec.rb +112 -0
- data/spec/chained_search_spec.rb +40 -0
- data/spec/field_value_spec.rb +52 -0
- data/spec/indices_spec.rb +41 -0
- data/spec/last_spec.rb +77 -0
- data/spec/limit_spec.rb +11 -0
- data/spec/nested_search_spec.rb +82 -0
- data/spec/regexp_spec.rb +48 -0
- data/spec/shared/dummy_client.rb +14 -0
- data/spec/shared/peruse_stubs.rb +5 -0
- data/spec/shared/time_stubs.rb +12 -0
- data/spec/spec_helper.rb +28 -0
- data/spec/window_spec.rb +54 -0
- metadata +221 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 15b1475221dbde1e53dcce6955d47fc167274e62
|
4
|
+
data.tar.gz: d28ee6470fa23b902465727c41ea6aa642a3a5dd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f1072f67995b3698514c4002d2ada8cf34c52ee16eae3331160a2ba0cd3eaf1f2eb1b51f3bedc78f0f96f0b816400eb37fc19e0db16e249d6b31d17859fee92d
|
7
|
+
data.tar.gz: df29c59ec1b7915342e57cebf3c40a87b92b86dd7700f46d527f86cfe4e15551dbaa45fa02c635825979a22c2f48b7b53a35197b657b7ba16cd8d1b3dab2a261
|
data/.gitignore
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1
|
data/AUTHORS
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
plunk (0.3.11)
|
5
|
+
activesupport (~> 4.0, >= 4.0.0)
|
6
|
+
chronic (~> 0.10, >= 0.10.0)
|
7
|
+
elasticsearch (~> 1.0, >= 1.0.0)
|
8
|
+
json (~> 1.8, >= 1.8.0)
|
9
|
+
parslet (~> 1.5, >= 1.5.0)
|
10
|
+
|
11
|
+
GEM
|
12
|
+
remote: https://rubygems.org/
|
13
|
+
specs:
|
14
|
+
activesupport (4.1.4)
|
15
|
+
i18n (~> 0.6, >= 0.6.9)
|
16
|
+
json (~> 1.7, >= 1.7.7)
|
17
|
+
minitest (~> 5.1)
|
18
|
+
thread_safe (~> 0.1)
|
19
|
+
tzinfo (~> 1.1)
|
20
|
+
blankslate (2.1.2.4)
|
21
|
+
chronic (0.10.2)
|
22
|
+
diff-lcs (1.2.5)
|
23
|
+
elasticsearch (1.0.4)
|
24
|
+
elasticsearch-api (= 1.0.4)
|
25
|
+
elasticsearch-transport (= 1.0.4)
|
26
|
+
elasticsearch-api (1.0.4)
|
27
|
+
multi_json
|
28
|
+
elasticsearch-transport (1.0.4)
|
29
|
+
faraday
|
30
|
+
multi_json
|
31
|
+
faraday (0.9.0)
|
32
|
+
multipart-post (>= 1.2, < 3)
|
33
|
+
i18n (0.6.11)
|
34
|
+
json (1.8.1)
|
35
|
+
minitest (5.4.0)
|
36
|
+
multi_json (1.10.1)
|
37
|
+
multipart-post (2.0.0)
|
38
|
+
parslet (1.6.1)
|
39
|
+
blankslate (~> 2.0)
|
40
|
+
rspec (3.1.0)
|
41
|
+
rspec-core (~> 3.1.0)
|
42
|
+
rspec-expectations (~> 3.1.0)
|
43
|
+
rspec-mocks (~> 3.1.0)
|
44
|
+
rspec-core (3.1.3)
|
45
|
+
rspec-support (~> 3.1.0)
|
46
|
+
rspec-expectations (3.1.1)
|
47
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
48
|
+
rspec-support (~> 3.1.0)
|
49
|
+
rspec-mocks (3.1.0)
|
50
|
+
rspec-support (~> 3.1.0)
|
51
|
+
rspec-support (3.1.0)
|
52
|
+
thread_safe (0.3.4)
|
53
|
+
timecop (0.7.1)
|
54
|
+
tzinfo (1.2.1)
|
55
|
+
thread_safe (~> 0.1)
|
56
|
+
|
57
|
+
PLATFORMS
|
58
|
+
ruby
|
59
|
+
|
60
|
+
DEPENDENCIES
|
61
|
+
plunk!
|
62
|
+
rspec (~> 3.1, >= 3.1.0)
|
63
|
+
timecop (~> 0.7, >= 0.7.1)
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2013 Elbii
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
Note: Plunk has been renamed to Peruse
|
2
|
+
|
3
|
+
Peruse
|
4
|
+
======
|
5
|
+
|
6
|
+
Human-friendly query language for Elasticsearch
|
7
|
+
|
8
|
+
## About
|
9
|
+
|
10
|
+
Peruse is a ruby gem to take a human-friendly, one-line search command and
|
11
|
+
translate it to full-fledged JSON to send to Elasticsearch. Currently it only
|
12
|
+
supports a few commands, but the goal is to support a large subset of what
|
13
|
+
Elasticsearch offers.
|
14
|
+
|
15
|
+
## Installation
|
16
|
+
```
|
17
|
+
gem install peruse
|
18
|
+
```
|
19
|
+
|
20
|
+
Peruse uses [Parslet](https://github.com/kschiess/parslet) to first parse your
|
21
|
+
query, and then [Elasticsearch's official ruby library](https://github.com/elasticsearch/elasticsearch-ruby)
|
22
|
+
to send it to Elasticsearch.
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
```ruby
|
26
|
+
require 'peruse'
|
27
|
+
|
28
|
+
#
|
29
|
+
# Configuration is required before using Peruse
|
30
|
+
#
|
31
|
+
# Elasticsearch_options accepts the same params as Elasticsearch::Client
|
32
|
+
# from the elasticsearch-ruby library
|
33
|
+
Peruse.configure do |config|
|
34
|
+
config.elasticsearch_options = { host: 'localhost' }
|
35
|
+
end
|
36
|
+
|
37
|
+
# Restrict timeframe to last 1 week and match documents with _type=syslog
|
38
|
+
# s = seconds
|
39
|
+
# m = minutes
|
40
|
+
# h = hours
|
41
|
+
# d = days
|
42
|
+
# w = weeks
|
43
|
+
# All times in Peruse are converted to UTC
|
44
|
+
Peruse.search 'last 1w AND _type = syslog'
|
45
|
+
|
46
|
+
# The ```window``` command can also be used to filter by time
|
47
|
+
Peruse.search 'window -2d to -1d'
|
48
|
+
|
49
|
+
# Peruse tries to parse the date with Chronic, so this works too. Note the
|
50
|
+
# double quotes around the time string. This is needed if it contains a space.
|
51
|
+
Peruse.search 'window "last monday" to "last thursday"'
|
52
|
+
|
53
|
+
# Of course, absolute dates are supported as well. Date format is American style
|
54
|
+
# e.g. MM/DD/YY
|
55
|
+
Peruse.search 'window 3/14/12 to 3/15/12'
|
56
|
+
|
57
|
+
# Use double quotes to wrap space-containing strings
|
58
|
+
Peruse.search 'http.header = "UserAgent: Mozilla/5.0"'
|
59
|
+
|
60
|
+
# Commands are joined using parenthesized booleans
|
61
|
+
Peruse.search '(last 1h AND severity = 5) OR (last 1w AND severity = 3)'
|
62
|
+
|
63
|
+
# "AND" is aliased to "and" and "&". Similarly, "OR" is aliased to "or" and "|".
|
64
|
+
# The following queries are identical to one above
|
65
|
+
Peruse.search '(last 1h and severity = 5) or (last 1w and severity = 3)'
|
66
|
+
Peruse.search '(last 1h & severity = 5) | (last 1w & severity = 3)'
|
67
|
+
|
68
|
+
# Use the NOT keyword to negate the following command or boolean chain
|
69
|
+
Peruse.search 'NOT message = Error'
|
70
|
+
|
71
|
+
# Like AND and OR, "NOT" is aliased to "not" and "~"
|
72
|
+
Peruse.search 'not message = Error'
|
73
|
+
Peruse.search '~ message = Error'
|
74
|
+
|
75
|
+
# Regexp is supported as well
|
76
|
+
Peruse.search 'http.headers = /.*User-Agent: Mozilla.*/ OR http.headers = /.*application\/json.*/'
|
77
|
+
```
|
78
|
+
|
79
|
+
|
80
|
+
## Translation
|
81
|
+
|
82
|
+
Under the hood, Peruse takes your query and translates it to
|
83
|
+
Elasticsearch-compatible JSON. For example,
|
84
|
+
|
85
|
+
```last 24h & _type=syslog```
|
86
|
+
|
87
|
+
gets translated to:
|
88
|
+
|
89
|
+
```json
|
90
|
+
{
|
91
|
+
"query": {
|
92
|
+
"filtered": {
|
93
|
+
"filter": {
|
94
|
+
"and": [
|
95
|
+
{
|
96
|
+
"range": {
|
97
|
+
"timestamp": {
|
98
|
+
"gte": "2013-08-23T05:43:13.770Z",
|
99
|
+
"lte": "2013-08-24T05:43:13.770Z"
|
100
|
+
}
|
101
|
+
}
|
102
|
+
},
|
103
|
+
{
|
104
|
+
"query": {
|
105
|
+
"query_string": {
|
106
|
+
"query": "_type:syslog"
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
]
|
111
|
+
}
|
112
|
+
}
|
113
|
+
}
|
114
|
+
}
|
115
|
+
```
|
116
|
+
|
117
|
+
In general, commands are combined into a single filter using Elasticsearch's,
|
118
|
+
```and```, ```or```, and ```not``` filters.
|
data/Rakefile
ADDED
data/bin/peruse
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH << './lib'
|
3
|
+
require 'peruse'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
options = {}
|
7
|
+
OptionParser.new do |opts|
|
8
|
+
opts.banner = "Usage: peruse [options]"
|
9
|
+
opts.separator ""
|
10
|
+
opts.separator "Options"
|
11
|
+
|
12
|
+
opts.on(
|
13
|
+
"-h",
|
14
|
+
"--host HOST",
|
15
|
+
"comma-separated list of Elasticsearch hosts to use"
|
16
|
+
) do |option|
|
17
|
+
options[:host] = option
|
18
|
+
end
|
19
|
+
|
20
|
+
opts.on(
|
21
|
+
"-p",
|
22
|
+
"--parse-only",
|
23
|
+
"parse but don't execute query, returning ES-compatible JSON"
|
24
|
+
) do |option|
|
25
|
+
options[:parse_only] = option
|
26
|
+
end
|
27
|
+
|
28
|
+
opts.on(
|
29
|
+
"-s",
|
30
|
+
"--size SIZE",
|
31
|
+
"max number of hits to return"
|
32
|
+
) do |option|
|
33
|
+
options[:size] = option
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on(
|
37
|
+
"-r",
|
38
|
+
"--randomize-hosts",
|
39
|
+
"randomize hosts used for each search"
|
40
|
+
) do |option|
|
41
|
+
options[:randomize_hosts] = option
|
42
|
+
end
|
43
|
+
|
44
|
+
opts.on(
|
45
|
+
"-t",
|
46
|
+
"--timestamp-field FIELD",
|
47
|
+
"timestamp field to use for timerange searches"
|
48
|
+
) do |option|
|
49
|
+
options[:timestamp_field] = option
|
50
|
+
end
|
51
|
+
|
52
|
+
opts.on(
|
53
|
+
"-d",
|
54
|
+
"--debug",
|
55
|
+
"turn on debugging output"
|
56
|
+
) do |option|
|
57
|
+
options[:debug] = option
|
58
|
+
end
|
59
|
+
|
60
|
+
end.parse!
|
61
|
+
|
62
|
+
Peruse.configure do |c|
|
63
|
+
c.parse_only = options[:parse_only]
|
64
|
+
c.max_number_of_hits = options[:size].to_i if options[:size]
|
65
|
+
c.timestamp_field =
|
66
|
+
options[:timestamp_field].strip if options[:timstamp_field]
|
67
|
+
c.logger = Logger.new(STDOUT) if options[:debug]
|
68
|
+
|
69
|
+
c.elasticsearch_client = Elasticsearch::Client.new(
|
70
|
+
host: options[:host].split(',').collect! { |h| h.strip },
|
71
|
+
randomize_hosts: options[:randomize_hosts]
|
72
|
+
) unless c.parse_only
|
73
|
+
end
|
74
|
+
|
75
|
+
puts Peruse.search($stdin.read).to_json
|
@@ -0,0 +1,25 @@
|
|
1
|
+
{
|
2
|
+
"query":{
|
3
|
+
"filtered":{
|
4
|
+
"filter":{
|
5
|
+
"and":[
|
6
|
+
{
|
7
|
+
"range":{
|
8
|
+
"timestamp":{
|
9
|
+
"gte":"2014-04-01T16:00:00.000+00:00",
|
10
|
+
"lte":"2014-04-07T16:00:00.000+00:00"
|
11
|
+
}
|
12
|
+
}
|
13
|
+
},
|
14
|
+
{
|
15
|
+
"query":{
|
16
|
+
"query_string":{
|
17
|
+
"query":"_type:syslog"
|
18
|
+
}
|
19
|
+
}
|
20
|
+
}
|
21
|
+
]
|
22
|
+
}
|
23
|
+
}
|
24
|
+
}
|
25
|
+
}
|
data/examples/simple.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
$LOAD_PATH << './lib'
|
2
|
+
require './lib/peruse'
|
3
|
+
|
4
|
+
Peruse.configure do |c|
|
5
|
+
c.elasticsearch_options = { host: 'localhost' }
|
6
|
+
c.timestamp_field = :timestamp
|
7
|
+
end
|
8
|
+
|
9
|
+
query = 'window "last monday" to "last tuesday" & _type = syslog'
|
10
|
+
puts Peruse.search query
|
data/lib/peruse.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'elasticsearch'
|
2
|
+
|
3
|
+
require 'peruse/helper'
|
4
|
+
require 'peruse/utils'
|
5
|
+
require 'peruse/parser'
|
6
|
+
require 'peruse/transformer'
|
7
|
+
require 'peruse/result_set'
|
8
|
+
|
9
|
+
module Peruse
|
10
|
+
class << self
|
11
|
+
attr_accessor :elasticsearch_options, :elasticsearch_client,
|
12
|
+
:parser, :transformer, :max_number_of_hits, :timestamp_field, :logger,
|
13
|
+
:parse_only
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.configure(&block)
|
17
|
+
class_eval(&block)
|
18
|
+
self.timestamp_field ||= :timestamp
|
19
|
+
initialize_parser
|
20
|
+
initialize_transformer
|
21
|
+
initialize_elasticsearch unless self.parse_only
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.initialize_elasticsearch
|
25
|
+
self.elasticsearch_client ||= Elasticsearch::Client.new(elasticsearch_options)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.initialize_parser
|
29
|
+
self.parser ||= Parser.new
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.initialize_transformer
|
33
|
+
self.transformer ||= Transformer.new
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.search(query_string)
|
37
|
+
parsed = parser.parse query_string
|
38
|
+
transformed = transformer.apply parsed
|
39
|
+
|
40
|
+
if self.logger
|
41
|
+
self.logger.debug "Query String: #{query_string}"
|
42
|
+
self.logger.debug "Parsed Output: #{transformed}"
|
43
|
+
end
|
44
|
+
|
45
|
+
result_set = ResultSet.new(transformed)
|
46
|
+
|
47
|
+
if self.parse_only
|
48
|
+
result_set.query
|
49
|
+
else
|
50
|
+
result_set.eval
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'active_support/core_ext'
|
2
|
+
|
3
|
+
module Peruse
|
4
|
+
class Helper
|
5
|
+
def self.combine_subtrees(left, right, op)
|
6
|
+
if right[op]
|
7
|
+
{ op => [left] + right[op] }
|
8
|
+
else
|
9
|
+
{ op => [left, right] }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.query_builder(query_string)
|
14
|
+
{
|
15
|
+
query: {
|
16
|
+
query_string: {
|
17
|
+
query: query_string
|
18
|
+
}
|
19
|
+
}
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.filter_builder(filter)
|
24
|
+
{
|
25
|
+
query: {
|
26
|
+
filtered: {
|
27
|
+
filter: filter
|
28
|
+
}
|
29
|
+
}
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.limit_builder(limit)
|
34
|
+
{
|
35
|
+
limit: {
|
36
|
+
value: limit
|
37
|
+
}
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.range_builder(range_min, range_max)
|
42
|
+
{
|
43
|
+
range: {
|
44
|
+
Peruse.timestamp_field => {
|
45
|
+
gte: range_min,
|
46
|
+
lte: range_max
|
47
|
+
}
|
48
|
+
}
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.regexp_builder(field, regexp, flags=nil)
|
53
|
+
{
|
54
|
+
regexp: {
|
55
|
+
field => {
|
56
|
+
value: regexp,
|
57
|
+
flags: flags || 'ALL'
|
58
|
+
}
|
59
|
+
}
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.indices_builder(list)
|
64
|
+
{
|
65
|
+
indices: {
|
66
|
+
indices: list
|
67
|
+
}
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.time_query_to_timestamp(int_quantity, quantifier)
|
72
|
+
case quantifier
|
73
|
+
when 's'
|
74
|
+
int_quantity.seconds.ago
|
75
|
+
when 'm'
|
76
|
+
int_quantity.minutes.ago
|
77
|
+
when 'h'
|
78
|
+
int_quantity.hours.ago
|
79
|
+
when 'd'
|
80
|
+
int_quantity.days.ago
|
81
|
+
when 'w'
|
82
|
+
int_quantity.weeks.ago
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.timestamp_format(time)
|
87
|
+
time.utc.to_datetime.iso8601(3)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|