peruse 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 15b1475221dbde1e53dcce6955d47fc167274e62
4
+ data.tar.gz: d28ee6470fa23b902465727c41ea6aa642a3a5dd
5
+ SHA512:
6
+ metadata.gz: f1072f67995b3698514c4002d2ada8cf34c52ee16eae3331160a2ba0cd3eaf1f2eb1b51f3bedc78f0f96f0b816400eb37fc19e0db16e249d6b31d17859fee92d
7
+ data.tar.gz: df29c59ec1b7915342e57cebf3c40a87b92b86dd7700f46d527f86cfe4e15551dbaa45fa02c635825979a22c2f48b7b53a35197b657b7ba16cd8d1b3dab2a261
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1
data/AUTHORS ADDED
@@ -0,0 +1,2 @@
1
+ Jamil Bou Kheir <jamil@elbii.com>
2
+ Ram Mehta <ram.mehta@gmail.com>
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,63 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ plunk (0.3.11)
5
+ activesupport (~> 4.0, >= 4.0.0)
6
+ chronic (~> 0.10, >= 0.10.0)
7
+ elasticsearch (~> 1.0, >= 1.0.0)
8
+ json (~> 1.8, >= 1.8.0)
9
+ parslet (~> 1.5, >= 1.5.0)
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ activesupport (4.1.4)
15
+ i18n (~> 0.6, >= 0.6.9)
16
+ json (~> 1.7, >= 1.7.7)
17
+ minitest (~> 5.1)
18
+ thread_safe (~> 0.1)
19
+ tzinfo (~> 1.1)
20
+ blankslate (2.1.2.4)
21
+ chronic (0.10.2)
22
+ diff-lcs (1.2.5)
23
+ elasticsearch (1.0.4)
24
+ elasticsearch-api (= 1.0.4)
25
+ elasticsearch-transport (= 1.0.4)
26
+ elasticsearch-api (1.0.4)
27
+ multi_json
28
+ elasticsearch-transport (1.0.4)
29
+ faraday
30
+ multi_json
31
+ faraday (0.9.0)
32
+ multipart-post (>= 1.2, < 3)
33
+ i18n (0.6.11)
34
+ json (1.8.1)
35
+ minitest (5.4.0)
36
+ multi_json (1.10.1)
37
+ multipart-post (2.0.0)
38
+ parslet (1.6.1)
39
+ blankslate (~> 2.0)
40
+ rspec (3.1.0)
41
+ rspec-core (~> 3.1.0)
42
+ rspec-expectations (~> 3.1.0)
43
+ rspec-mocks (~> 3.1.0)
44
+ rspec-core (3.1.3)
45
+ rspec-support (~> 3.1.0)
46
+ rspec-expectations (3.1.1)
47
+ diff-lcs (>= 1.2.0, < 2.0)
48
+ rspec-support (~> 3.1.0)
49
+ rspec-mocks (3.1.0)
50
+ rspec-support (~> 3.1.0)
51
+ rspec-support (3.1.0)
52
+ thread_safe (0.3.4)
53
+ timecop (0.7.1)
54
+ tzinfo (1.2.1)
55
+ thread_safe (~> 0.1)
56
+
57
+ PLATFORMS
58
+ ruby
59
+
60
+ DEPENDENCIES
61
+ plunk!
62
+ rspec (~> 3.1, >= 3.1.0)
63
+ timecop (~> 0.7, >= 0.7.1)
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Elbii
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,118 @@
1
+ Note: Plunk has been renamed to Peruse
2
+
3
+ Peruse
4
+ ======
5
+
6
+ Human-friendly query language for Elasticsearch
7
+
8
+ ## About
9
+
10
+ Peruse is a ruby gem to take a human-friendly, one-line search command and
11
+ translate it to full-fledged JSON to send to Elasticsearch. Currently it only
12
+ supports a few commands, but the goal is to support a large subset of what
13
+ Elasticsearch offers.
14
+
15
+ ## Installation
16
+ ```
17
+ gem install peruse
18
+ ```
19
+
20
+ Peruse uses [Parslet](https://github.com/kschiess/parslet) to first parse your
21
+ query, and then [Elasticsearch's official ruby library](https://github.com/elasticsearch/elasticsearch-ruby)
22
+ to send it to Elasticsearch.
23
+
24
+ ## Usage
25
+ ```ruby
26
+ require 'peruse'
27
+
28
+ #
29
+ # Configuration is required before using Peruse
30
+ #
31
+ # Elasticsearch_options accepts the same params as Elasticsearch::Client
32
+ # from the elasticsearch-ruby library
33
+ Peruse.configure do |config|
34
+ config.elasticsearch_options = { host: 'localhost' }
35
+ end
36
+
37
+ # Restrict timeframe to last 1 week and match documents with _type=syslog
38
+ # s = seconds
39
+ # m = minutes
40
+ # h = hours
41
+ # d = days
42
+ # w = weeks
43
+ # All times in Peruse are converted to UTC
44
+ Peruse.search 'last 1w AND _type = syslog'
45
+
46
+ # The ```window``` command can also be used to filter by time
47
+ Peruse.search 'window -2d to -1d'
48
+
49
+ # Peruse tries to parse the date with Chronic, so this works too. Note the
50
+ # double quotes around the time string. This is needed if it contains a space.
51
+ Peruse.search 'window "last monday" to "last thursday"'
52
+
53
+ # Of course, absolute dates are supported as well. Date format is American style
54
+ # e.g. MM/DD/YY
55
+ Peruse.search 'window 3/14/12 to 3/15/12'
56
+
57
+ # Use double quotes to wrap space-containing strings
58
+ Peruse.search 'http.header = "UserAgent: Mozilla/5.0"'
59
+
60
+ # Commands are joined using parenthesized booleans
61
+ Peruse.search '(last 1h AND severity = 5) OR (last 1w AND severity = 3)'
62
+
63
+ # "AND" is aliased to "and" and "&". Similarly, "OR" is aliased to "or" and "|".
64
+ # The following queries are identical to one above
65
+ Peruse.search '(last 1h and severity = 5) or (last 1w and severity = 3)'
66
+ Peruse.search '(last 1h & severity = 5) | (last 1w & severity = 3)'
67
+
68
+ # Use the NOT keyword to negate the following command or boolean chain
69
+ Peruse.search 'NOT message = Error'
70
+
71
+ # Like AND and OR, "NOT" is aliased to "not" and "~"
72
+ Peruse.search 'not message = Error'
73
+ Peruse.search '~ message = Error'
74
+
75
+ # Regexp is supported as well
76
+ Peruse.search 'http.headers = /.*User-Agent: Mozilla.*/ OR http.headers = /.*application\/json.*/'
77
+ ```
78
+
79
+
80
+ ## Translation
81
+
82
+ Under the hood, Peruse takes your query and translates it to
83
+ Elasticsearch-compatible JSON. For example,
84
+
85
+ ```last 24h & _type=syslog```
86
+
87
+ gets translated to:
88
+
89
+ ```json
90
+ {
91
+ "query": {
92
+ "filtered": {
93
+ "filter": {
94
+ "and": [
95
+ {
96
+ "range": {
97
+ "timestamp": {
98
+ "gte": "2013-08-23T05:43:13.770Z",
99
+ "lte": "2013-08-24T05:43:13.770Z"
100
+ }
101
+ }
102
+ },
103
+ {
104
+ "query": {
105
+ "query_string": {
106
+ "query": "_type:syslog"
107
+ }
108
+ }
109
+ }
110
+ ]
111
+ }
112
+ }
113
+ }
114
+ }
115
+ ```
116
+
117
+ In general, commands are combined into a single filter using Elasticsearch's,
118
+ ```and```, ```or```, and ```not``` filters.
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require 'rspec/core/rake_task'
2
+
3
+ RSpec::Core::RakeTask.new(:spec)
4
+
5
+ task :default => :spec
data/bin/peruse ADDED
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH << './lib'
3
+ require 'peruse'
4
+ require 'optparse'
5
+
6
+ options = {}
7
+ OptionParser.new do |opts|
8
+ opts.banner = "Usage: peruse [options]"
9
+ opts.separator ""
10
+ opts.separator "Options"
11
+
12
+ opts.on(
13
+ "-h",
14
+ "--host HOST",
15
+ "comma-separated list of Elasticsearch hosts to use"
16
+ ) do |option|
17
+ options[:host] = option
18
+ end
19
+
20
+ opts.on(
21
+ "-p",
22
+ "--parse-only",
23
+ "parse but don't execute query, returning ES-compatible JSON"
24
+ ) do |option|
25
+ options[:parse_only] = option
26
+ end
27
+
28
+ opts.on(
29
+ "-s",
30
+ "--size SIZE",
31
+ "max number of hits to return"
32
+ ) do |option|
33
+ options[:size] = option
34
+ end
35
+
36
+ opts.on(
37
+ "-r",
38
+ "--randomize-hosts",
39
+ "randomize hosts used for each search"
40
+ ) do |option|
41
+ options[:randomize_hosts] = option
42
+ end
43
+
44
+ opts.on(
45
+ "-t",
46
+ "--timestamp-field FIELD",
47
+ "timestamp field to use for timerange searches"
48
+ ) do |option|
49
+ options[:timestamp_field] = option
50
+ end
51
+
52
+ opts.on(
53
+ "-d",
54
+ "--debug",
55
+ "turn on debugging output"
56
+ ) do |option|
57
+ options[:debug] = option
58
+ end
59
+
60
+ end.parse!
61
+
62
+ Peruse.configure do |c|
63
+ c.parse_only = options[:parse_only]
64
+ c.max_number_of_hits = options[:size].to_i if options[:size]
65
+ c.timestamp_field =
66
+ options[:timestamp_field].strip if options[:timstamp_field]
67
+ c.logger = Logger.new(STDOUT) if options[:debug]
68
+
69
+ c.elasticsearch_client = Elasticsearch::Client.new(
70
+ host: options[:host].split(',').collect! { |h| h.strip },
71
+ randomize_hosts: options[:randomize_hosts]
72
+ ) unless c.parse_only
73
+ end
74
+
75
+ puts Peruse.search($stdin.read).to_json
@@ -0,0 +1,25 @@
1
+ {
2
+ "query":{
3
+ "filtered":{
4
+ "filter":{
5
+ "and":[
6
+ {
7
+ "range":{
8
+ "timestamp":{
9
+ "gte":"2014-04-01T16:00:00.000+00:00",
10
+ "lte":"2014-04-07T16:00:00.000+00:00"
11
+ }
12
+ }
13
+ },
14
+ {
15
+ "query":{
16
+ "query_string":{
17
+ "query":"_type:syslog"
18
+ }
19
+ }
20
+ }
21
+ ]
22
+ }
23
+ }
24
+ }
25
+ }
@@ -0,0 +1,10 @@
1
+ $LOAD_PATH << './lib'
2
+ require './lib/peruse'
3
+
4
+ Peruse.configure do |c|
5
+ c.elasticsearch_options = { host: 'localhost' }
6
+ c.timestamp_field = :timestamp
7
+ end
8
+
9
+ query = 'window "last monday" to "last tuesday" & _type = syslog'
10
+ puts Peruse.search query
data/lib/peruse.rb ADDED
@@ -0,0 +1,53 @@
1
+ require 'elasticsearch'
2
+
3
+ require 'peruse/helper'
4
+ require 'peruse/utils'
5
+ require 'peruse/parser'
6
+ require 'peruse/transformer'
7
+ require 'peruse/result_set'
8
+
9
+ module Peruse
10
+ class << self
11
+ attr_accessor :elasticsearch_options, :elasticsearch_client,
12
+ :parser, :transformer, :max_number_of_hits, :timestamp_field, :logger,
13
+ :parse_only
14
+ end
15
+
16
+ def self.configure(&block)
17
+ class_eval(&block)
18
+ self.timestamp_field ||= :timestamp
19
+ initialize_parser
20
+ initialize_transformer
21
+ initialize_elasticsearch unless self.parse_only
22
+ end
23
+
24
+ def self.initialize_elasticsearch
25
+ self.elasticsearch_client ||= Elasticsearch::Client.new(elasticsearch_options)
26
+ end
27
+
28
+ def self.initialize_parser
29
+ self.parser ||= Parser.new
30
+ end
31
+
32
+ def self.initialize_transformer
33
+ self.transformer ||= Transformer.new
34
+ end
35
+
36
+ def self.search(query_string)
37
+ parsed = parser.parse query_string
38
+ transformed = transformer.apply parsed
39
+
40
+ if self.logger
41
+ self.logger.debug "Query String: #{query_string}"
42
+ self.logger.debug "Parsed Output: #{transformed}"
43
+ end
44
+
45
+ result_set = ResultSet.new(transformed)
46
+
47
+ if self.parse_only
48
+ result_set.query
49
+ else
50
+ result_set.eval
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,90 @@
1
+ require 'active_support/core_ext'
2
+
3
+ module Peruse
4
+ class Helper
5
+ def self.combine_subtrees(left, right, op)
6
+ if right[op]
7
+ { op => [left] + right[op] }
8
+ else
9
+ { op => [left, right] }
10
+ end
11
+ end
12
+
13
+ def self.query_builder(query_string)
14
+ {
15
+ query: {
16
+ query_string: {
17
+ query: query_string
18
+ }
19
+ }
20
+ }
21
+ end
22
+
23
+ def self.filter_builder(filter)
24
+ {
25
+ query: {
26
+ filtered: {
27
+ filter: filter
28
+ }
29
+ }
30
+ }
31
+ end
32
+
33
+ def self.limit_builder(limit)
34
+ {
35
+ limit: {
36
+ value: limit
37
+ }
38
+ }
39
+ end
40
+
41
+ def self.range_builder(range_min, range_max)
42
+ {
43
+ range: {
44
+ Peruse.timestamp_field => {
45
+ gte: range_min,
46
+ lte: range_max
47
+ }
48
+ }
49
+ }
50
+ end
51
+
52
+ def self.regexp_builder(field, regexp, flags=nil)
53
+ {
54
+ regexp: {
55
+ field => {
56
+ value: regexp,
57
+ flags: flags || 'ALL'
58
+ }
59
+ }
60
+ }
61
+ end
62
+
63
+ def self.indices_builder(list)
64
+ {
65
+ indices: {
66
+ indices: list
67
+ }
68
+ }
69
+ end
70
+
71
+ def self.time_query_to_timestamp(int_quantity, quantifier)
72
+ case quantifier
73
+ when 's'
74
+ int_quantity.seconds.ago
75
+ when 'm'
76
+ int_quantity.minutes.ago
77
+ when 'h'
78
+ int_quantity.hours.ago
79
+ when 'd'
80
+ int_quantity.days.ago
81
+ when 'w'
82
+ int_quantity.weeks.ago
83
+ end
84
+ end
85
+
86
+ def self.timestamp_format(time)
87
+ time.utc.to_datetime.iso8601(3)
88
+ end
89
+ end
90
+ end