elastic-stats 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +29 -3
- data/lib/elastic/stats/elastic_client.rb +9 -0
- data/lib/elastic/stats/ks.rb +16 -13
- data/lib/elastic/stats/naive-bayes.rb +9 -0
- data/lib/elastic/stats/naive-bayes/predictor.rb +43 -0
- data/lib/elastic/stats/naive-bayes/set.rb +103 -0
- data/lib/elastic/stats/naive-bayes/token_stats.rb +64 -0
- data/lib/elastic/stats/version.rb +1 -1
- data/spec/elastic/stats/elastic_client_spec.rb +46 -29
- data/spec/elastic/stats/ks_spec.rb +26 -3
- data/spec/elastic/stats/naive-bayes/predictor_spec.rb +12 -0
- data/spec/elastic/stats/naive-bayes/set_spec.rb +14 -0
- data/spec/elastic/stats/naive-bayes/token_stats_spec.rb +66 -0
- data/spec/spec_helper.rb +2 -1
- metadata +41 -55
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 14b0b8911410832cfbb96d6d872be82f87a52091
|
4
|
+
data.tar.gz: 247ff87c96e46487e2078e509122efc518bbf760
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ccbbe4e10c740ece220fd8942b1cac41c3d8ce0e86ce6f34ab569d59a3671c5f5f0a5060232d32006cdc391dac0388f56e952c2e65efe4484fdf73a76dcfa8b5
|
7
|
+
data.tar.gz: e63a3e56d5b01c4c8b2ba10aee7ae674386677437f795b9e6689bcc4913f40b4d75514e71ae53b5c0998ef2585aa1cd4edb6f33bf120a12ffd6dac4d70c22826
|
data/README.md
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# Elastic::Stats
|
2
2
|
|
3
|
-
TODO: Write a gem description
|
4
|
-
|
5
3
|
## Installation
|
6
4
|
|
7
5
|
Add this line to your application's Gemfile:
|
@@ -18,7 +16,35 @@ Or install it yourself as:
|
|
18
16
|
|
19
17
|
## Usage
|
20
18
|
|
21
|
-
|
19
|
+
### KS
|
20
|
+
|
21
|
+
~~~
|
22
|
+
require 'elastic/stats/ks'
|
23
|
+
require 'logger'
|
24
|
+
|
25
|
+
# Set the URL
|
26
|
+
ENV['ELASTICSEARCH_URL'] = 'https://test.eagerelk.com/'
|
27
|
+
# This is perfect for logstash stats
|
28
|
+
stats = Elastic::Stats::KS.new 'logstash-2015.03.05'
|
29
|
+
# Set some client options to enable logging and debugging
|
30
|
+
stats.client_options = {
|
31
|
+
debug: true,
|
32
|
+
logger: Logger.new(STDOUT),
|
33
|
+
request_body: true,
|
34
|
+
transport_options: {
|
35
|
+
ssl: { verify: false }
|
36
|
+
}
|
37
|
+
}
|
38
|
+
# Add extra filters to the query
|
39
|
+
stats.query = { filtered: { filter: { fquery: { query: { query_string:{ query: "type:(\"crimson_db\")"}}}}}}
|
40
|
+
|
41
|
+
# Fetch and output the stats
|
42
|
+
puts stats.fetch.inspect
|
43
|
+
~~~
|
44
|
+
|
45
|
+
### Naive Bayes Filter
|
46
|
+
|
47
|
+
TODO
|
22
48
|
|
23
49
|
## Contributing
|
24
50
|
|
@@ -5,6 +5,7 @@ module Elastic
|
|
5
5
|
# Module to set up and manage the Elasticsearch client
|
6
6
|
module ElasticClient
|
7
7
|
attr_writer :client
|
8
|
+
attr_accessor :index, :type
|
8
9
|
|
9
10
|
def client
|
10
11
|
@client ||= Elasticsearch::Client.new client_options
|
@@ -18,6 +19,14 @@ module Elastic
|
|
18
19
|
client_options.update(options)
|
19
20
|
end
|
20
21
|
|
22
|
+
def search(options = {})
|
23
|
+
client.search({ index: index, type: type }.merge(options))
|
24
|
+
end
|
25
|
+
|
26
|
+
def analyze(options = {})
|
27
|
+
client.indices.analyze({ index: index, type: type }.merge(options))
|
28
|
+
end
|
29
|
+
|
21
30
|
private
|
22
31
|
|
23
32
|
def default_options
|
data/lib/elastic/stats/ks.rb
CHANGED
@@ -9,8 +9,7 @@ module Elastic
|
|
9
9
|
class KS
|
10
10
|
include ElasticClient
|
11
11
|
|
12
|
-
attr_accessor :logger
|
13
|
-
attr_writer :debug, :query
|
12
|
+
attr_accessor :logger, :query
|
14
13
|
attr_reader :indices, :to, :from, :span, :interval, :field
|
15
14
|
|
16
15
|
MULTIPLIERS = {
|
@@ -34,8 +33,7 @@ module Elastic
|
|
34
33
|
@field = options.delete(:field)
|
35
34
|
@offset = options.delete(:offset)
|
36
35
|
|
37
|
-
@indices = [indices]
|
38
|
-
@to = @to.to_i if @to.respond_to?(:to_i)
|
36
|
+
@indices = [indices] unless @indices.is_a? Array
|
39
37
|
@from = @to - @span
|
40
38
|
end
|
41
39
|
|
@@ -63,26 +61,31 @@ module Elastic
|
|
63
61
|
)
|
64
62
|
end
|
65
63
|
|
64
|
+
private
|
65
|
+
|
66
66
|
def range(from, to)
|
67
67
|
Hashie::Mash.new(
|
68
|
-
client.search index: indices.join(','), body:
|
68
|
+
client.search index: indices.join(','), body: body(from, to)
|
69
69
|
).aggregations.hits_per_minute.buckets.collect(&:doc_count)
|
70
70
|
end
|
71
71
|
|
72
|
-
def
|
73
|
-
|
74
|
-
|
72
|
+
def body(from, to)
|
73
|
+
body = Hashie::Mash.new
|
74
|
+
body.query = query if query
|
75
|
+
body.aggregations!.hits_per_minute!.date_histogram = aggregate(from, to)
|
76
|
+
body
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def aggregate(from, to)
|
82
|
+
{
|
75
83
|
field: field, interval: interval, min_doc_count: 0,
|
76
84
|
extended_bounds: {
|
77
85
|
min: (from * 1000),
|
78
86
|
max: (to * 1000)
|
79
87
|
}
|
80
88
|
}
|
81
|
-
@query
|
82
|
-
end
|
83
|
-
|
84
|
-
def debug?
|
85
|
-
@debug ||= ENV['DEBUG']
|
86
89
|
end
|
87
90
|
|
88
91
|
private
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'elastic/stats/elastic_client'
|
2
|
+
require 'elastic/stats/naive-bayes/token_stats'
|
3
|
+
|
4
|
+
module Elastic
|
5
|
+
module Stats
|
6
|
+
module NaiveBayes
|
7
|
+
# Utility to perform Naive Bayes category predictions on text
|
8
|
+
class Predictor
|
9
|
+
include ElasticClient
|
10
|
+
|
11
|
+
attr_reader :prior_set
|
12
|
+
attr_writer :adjust
|
13
|
+
|
14
|
+
def initialize(prior_set)
|
15
|
+
@prior_set = prior_set
|
16
|
+
end
|
17
|
+
|
18
|
+
def guess(subject)
|
19
|
+
scores = {}
|
20
|
+
prior_set.categories.keys.each do |category|
|
21
|
+
scores[category] = score(subject, category)
|
22
|
+
end
|
23
|
+
Hash[scores.sort_by { |label, score| -score }]
|
24
|
+
end
|
25
|
+
|
26
|
+
def score(subject, category)
|
27
|
+
# Calculate the propability for each token in this category
|
28
|
+
log_sum = tokenize(subject).reduce(0) do |sum, token|
|
29
|
+
stats = TokenStats.new(token, prior_set)
|
30
|
+
sum + stats.bayes(category)
|
31
|
+
end
|
32
|
+
|
33
|
+
1 / (1 + Math.exp(log_sum))
|
34
|
+
end
|
35
|
+
|
36
|
+
def tokenize(subject)
|
37
|
+
@tokenize ||= Hash.new { |h, k| h[k] = prior_set.tokenize k }
|
38
|
+
@tokenize[subject]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'hashie'
|
2
|
+
require 'elastic/stats/elastic_client'
|
3
|
+
|
4
|
+
module Elastic
|
5
|
+
module Stats
|
6
|
+
module NaiveBayes
|
7
|
+
# A set of documents against which statistics will be calculated
|
8
|
+
class Set
|
9
|
+
include ElasticClient
|
10
|
+
|
11
|
+
attr_reader :category_field, :subject_field, :index, :type
|
12
|
+
|
13
|
+
def initialize(index, type, category_field, subject_field)
|
14
|
+
@index = index
|
15
|
+
@type = type
|
16
|
+
@category_field = category_field
|
17
|
+
@subject_field = subject_field
|
18
|
+
end
|
19
|
+
|
20
|
+
def count
|
21
|
+
init_stats if @count.nil?
|
22
|
+
@count
|
23
|
+
end
|
24
|
+
|
25
|
+
def categories
|
26
|
+
init_stats if @categories.nil?
|
27
|
+
@categories
|
28
|
+
end
|
29
|
+
|
30
|
+
def tokens
|
31
|
+
@tokens ||= Hash.new { |h, k| h[k] = count_search[k]['hits']['total'] }
|
32
|
+
end
|
33
|
+
|
34
|
+
def token_categories
|
35
|
+
@token_categories ||= Hash.new do |h, k|
|
36
|
+
result = count_search[k]['aggregations']['counts']['buckets'].map do |bucket|
|
37
|
+
{ bucket['key'] => bucket['doc_count'] }
|
38
|
+
end
|
39
|
+
h[k] = Hash.new(0).merge(result.reduce(:merge))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def tokenize(subject)
|
44
|
+
results = analyze field: subject_field, text: subject
|
45
|
+
results['tokens'].collect { |x| x['token'] }
|
46
|
+
end
|
47
|
+
|
48
|
+
# Elasticsearch client helper methods
|
49
|
+
def search(options = {})
|
50
|
+
client.search({ index: index, type: type }.merge(options))
|
51
|
+
end
|
52
|
+
|
53
|
+
def analyze(options = {})
|
54
|
+
client.indices.analyze({ index: index }.merge(options))
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def init_stats
|
60
|
+
results = Hashie::Mash.new(
|
61
|
+
search(search_type: 'count', body: aggregation)
|
62
|
+
)
|
63
|
+
|
64
|
+
@count = results.hits.total
|
65
|
+
@categories = results.aggregations.counts.buckets.map do |bucket|
|
66
|
+
{ bucket['key'] => bucket['doc_count'] }
|
67
|
+
end
|
68
|
+
@categories = @categories.reduce(:merge)
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def count_search
|
74
|
+
@count_search ||= Hash.new{ |h, k| h[k] = search search_type: 'count', body: token_query(k) }
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def aggregation
|
80
|
+
{
|
81
|
+
aggs: {
|
82
|
+
counts: {
|
83
|
+
terms: {
|
84
|
+
field: category_field,
|
85
|
+
size: 200 # We're assuming there's less than 200 categories
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
}
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def token_query(token)
|
95
|
+
body = Hashie::Mash.new
|
96
|
+
body.query!.filtered!.filter!.term!
|
97
|
+
body.query.filtered.filter.term[subject_field] = token
|
98
|
+
body.merge aggregation
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module Elastic
|
2
|
+
module Stats
|
3
|
+
module NaiveBayes
|
4
|
+
# Provide statistics about a token in a specific set of data
|
5
|
+
class TokenStats
|
6
|
+
attr_reader :token, :set
|
7
|
+
|
8
|
+
def initialize(token, set)
|
9
|
+
@token = token
|
10
|
+
@set = set
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns the number of documents that contains the token
|
14
|
+
def count
|
15
|
+
set.tokens[token]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns the categories associated with the token in the set as a Hash
|
19
|
+
def categories
|
20
|
+
set.token_categories[token]
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the probability that a token is in the specified category
|
24
|
+
def probability(category)
|
25
|
+
return 0 unless categories.has_key? category
|
26
|
+
return 0 if set.categories[category] == 0
|
27
|
+
categories[category] / set.categories[category].to_f
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns the inverse probability that a token is in the category
|
31
|
+
def inverse(category)
|
32
|
+
return 0 unless categories.has_key? category
|
33
|
+
return 0 if (set.count - set.categories[category]) == 0
|
34
|
+
(count - categories[category]) / \
|
35
|
+
(set.count - set.categories[category]).to_f
|
36
|
+
end
|
37
|
+
|
38
|
+
def bayes(category)
|
39
|
+
return 0 if count == 0
|
40
|
+
return 0 if (probability(category) + inverse(category)) == 0
|
41
|
+
calculated = log_protect(
|
42
|
+
probability(category) / (probability(category) + inverse(category))
|
43
|
+
)
|
44
|
+
adjust(calculated)
|
45
|
+
Math.log(1 - calculated) - Math.log(calculated)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def adjust(probability, weight = 1, target = 0.5)
|
51
|
+
((weight * target) + (count * probability)) / (1 + count)
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def log_protect(probability)
|
57
|
+
return 0.0001 if probability == 0
|
58
|
+
return 0.9999 if probability == 1
|
59
|
+
probability
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -8,44 +8,61 @@ end
|
|
8
8
|
|
9
9
|
describe Elastic::Stats::ElasticClient do
|
10
10
|
subject { ElasticClientTest.new }
|
11
|
-
it 'allows the client to be set' do
|
12
|
-
client = Object.new
|
13
|
-
subject.client = client
|
14
11
|
|
15
|
-
|
16
|
-
|
12
|
+
context '#client' do
|
13
|
+
it 'allows the client to be set' do
|
14
|
+
client = Object.new
|
15
|
+
subject.client = client
|
17
16
|
|
18
|
-
|
19
|
-
|
20
|
-
url: ENV['ELASTICSEARCH_URL']
|
21
|
-
)
|
17
|
+
expect(subject.client).to be client
|
18
|
+
end
|
22
19
|
end
|
23
20
|
|
24
|
-
|
25
|
-
|
21
|
+
context '#client_options' do
|
22
|
+
it 'has sane default options' do
|
23
|
+
expect(subject.client_options).to eq(
|
24
|
+
url: ENV['ELASTICSEARCH_URL']
|
25
|
+
)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'allows the client options to be retrieved' do
|
29
|
+
expect(subject.client_options).to eq(url: ENV['ELASTICSEARCH_URL'])
|
30
|
+
end
|
26
31
|
end
|
27
32
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
33
|
+
context '#client_options=' do
|
34
|
+
it 'allows the default client options to be added' do
|
35
|
+
logger = Object.new
|
36
|
+
options = {
|
37
|
+
debug: true,
|
38
|
+
logger: logger
|
39
|
+
}
|
40
|
+
subject.client_options = options
|
41
|
+
|
42
|
+
options.update(url: ENV['ELASTICSEARCH_URL'])
|
43
|
+
expect(subject.client_options).to eq options
|
44
|
+
end
|
35
45
|
|
36
|
-
options
|
37
|
-
|
46
|
+
it 'allows the default client options to be overriden' do
|
47
|
+
logger = Object.new
|
48
|
+
options = {
|
49
|
+
debug: true,
|
50
|
+
logger: logger,
|
51
|
+
url: 'http://mytesturl.com:9200/'
|
52
|
+
}
|
53
|
+
subject.client_options = options
|
54
|
+
|
55
|
+
expect(subject.client_options).to eq options
|
56
|
+
end
|
38
57
|
end
|
39
58
|
|
40
|
-
|
41
|
-
|
42
|
-
options
|
43
|
-
|
44
|
-
logger: logger,
|
45
|
-
url: 'http://mytesturl.com:9200/'
|
46
|
-
}
|
47
|
-
subject.client_options = options
|
59
|
+
context '#search' do
|
60
|
+
it 'sends a search request using the specified type and index'
|
61
|
+
it 'sends a search request using the passed in options'
|
62
|
+
end
|
48
63
|
|
49
|
-
|
64
|
+
context '#analyze' do
|
65
|
+
it 'sends an analyze request using the specified type and index'
|
66
|
+
it 'sends an analyze request using the passed in options'
|
50
67
|
end
|
51
68
|
end
|
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
require 'elastic/stats/ks'
|
3
|
-
require 'webmock'
|
4
3
|
|
5
4
|
describe Elastic::Stats::KS do
|
6
5
|
it 'has an array of indices' do
|
@@ -26,13 +25,21 @@ describe Elastic::Stats::KS do
|
|
26
25
|
field: '@mytimefield'
|
27
26
|
}
|
28
27
|
ks = Elastic::Stats::KS.new('logstash-2015.12.12', options)
|
29
|
-
expect(ks.to).to eq now
|
30
|
-
expect(ks.from).to eq(ks.to
|
28
|
+
expect(ks.to).to eq now
|
29
|
+
expect(ks.from).to eq(ks.to - (60 * 60 * 24))
|
31
30
|
expect(ks.interval).to eq '5m'
|
32
31
|
expect(ks.field).to eq '@mytimefield'
|
33
32
|
end
|
34
33
|
|
35
34
|
context 'fetch' do
|
35
|
+
before(:all) do
|
36
|
+
WebMock.enable!
|
37
|
+
end
|
38
|
+
|
39
|
+
after(:all) do
|
40
|
+
WebMock.disable!
|
41
|
+
end
|
42
|
+
|
36
43
|
subject do
|
37
44
|
WebMock.stub_request(:get, 'http://localhost:9200/fake/_search')
|
38
45
|
.to_return(
|
@@ -68,4 +75,20 @@ describe Elastic::Stats::KS do
|
|
68
75
|
expect(subject[:comparison]).to eq 0.9616652224137048
|
69
76
|
end
|
70
77
|
end
|
78
|
+
|
79
|
+
context 'query' do
|
80
|
+
it 'has no default query' do
|
81
|
+
ks = Elastic::Stats::KS.new('logstash-2015.12.12')
|
82
|
+
|
83
|
+
expect(ks.query).to be_nil
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'allows for the query to be set' do
|
87
|
+
ks = Elastic::Stats::KS.new('logstash-2015.12.12')
|
88
|
+
query = { 'term' => { 'user' => 'eagerelk' } }
|
89
|
+
ks.query = query
|
90
|
+
|
91
|
+
expect(ks.query).to eq query
|
92
|
+
end
|
93
|
+
end
|
71
94
|
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'elastic/stats/naive-bayes/set'
|
3
|
+
|
4
|
+
describe Elastic::Stats::NaiveBayes::Set do
|
5
|
+
subject do
|
6
|
+
Elastic::Stats::NaiveBayes::Set.new('transactions', 'training', 'category')
|
7
|
+
end
|
8
|
+
|
9
|
+
context '#tokens' do
|
10
|
+
it 'works'
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'elastic/stats/naive-bayes/set'
|
3
|
+
|
4
|
+
describe Elastic::Stats::NaiveBayes::Set do
|
5
|
+
subject do
|
6
|
+
Elastic::Stats::NaiveBayes::Set.new(
|
7
|
+
'transactions', 'training', 'category', 'subject'
|
8
|
+
)
|
9
|
+
end
|
10
|
+
|
11
|
+
context '#tokens' do
|
12
|
+
it 'works'
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'elastic/stats/naive-bayes/token_stats'
|
3
|
+
|
4
|
+
# Testing Set
|
5
|
+
class TestSet
|
6
|
+
def count
|
7
|
+
10
|
8
|
+
end
|
9
|
+
|
10
|
+
def categories
|
11
|
+
{
|
12
|
+
afrikaans: 4,
|
13
|
+
english: 5,
|
14
|
+
french: 1
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
def tokens
|
19
|
+
{
|
20
|
+
'is' => 7
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def token_categories
|
25
|
+
{
|
26
|
+
'is' => {
|
27
|
+
afrikaans: 4,
|
28
|
+
english: 4
|
29
|
+
}
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def tokenize(subject)
|
34
|
+
subject.downcase.split
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def subject
|
39
|
+
@subject ||= Elastic::Stats::NaiveBayes::TokenStats.new 'is', TestSet.new
|
40
|
+
end
|
41
|
+
|
42
|
+
describe Elastic::Stats::NaiveBayes::TokenStats do
|
43
|
+
subject do
|
44
|
+
TokenStats.new 'is', TestSet.new
|
45
|
+
end
|
46
|
+
|
47
|
+
context '#count' do
|
48
|
+
it 'returns the number of documents that contains the token'
|
49
|
+
it 'returns the categories as an integer'
|
50
|
+
end
|
51
|
+
|
52
|
+
context '#categories' do
|
53
|
+
it 'returns the categories associated with the token in the set'
|
54
|
+
it 'returns the categories as a Hash'
|
55
|
+
end
|
56
|
+
|
57
|
+
context '#probability' do
|
58
|
+
it 'returns the probability that a token is in the specified category'
|
59
|
+
it 'returns the probability as a float'
|
60
|
+
end
|
61
|
+
|
62
|
+
context '#inverse' do
|
63
|
+
it 'returns the inverse probability that a token is in the category'
|
64
|
+
it 'returns the probability as a float'
|
65
|
+
end
|
66
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,190 +1,167 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elastic-stats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.2
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Jugrens du Toit
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2015-
|
11
|
+
date: 2015-05-27 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bundler
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.6'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- - ~>
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '1.6'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rspec
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - ">="
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - ">="
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: guard
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - ">="
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- -
|
52
|
+
- - ">="
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: guard-rspec
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
|
-
- -
|
59
|
+
- - ">="
|
68
60
|
- !ruby/object:Gem::Version
|
69
61
|
version: '0'
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
|
-
- -
|
66
|
+
- - ">="
|
76
67
|
- !ruby/object:Gem::Version
|
77
68
|
version: '0'
|
78
69
|
- !ruby/object:Gem::Dependency
|
79
70
|
name: rubocop
|
80
71
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
72
|
requirements:
|
83
|
-
- -
|
73
|
+
- - ">="
|
84
74
|
- !ruby/object:Gem::Version
|
85
75
|
version: '0'
|
86
76
|
type: :development
|
87
77
|
prerelease: false
|
88
78
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
79
|
requirements:
|
91
|
-
- -
|
80
|
+
- - ">="
|
92
81
|
- !ruby/object:Gem::Version
|
93
82
|
version: '0'
|
94
83
|
- !ruby/object:Gem::Dependency
|
95
84
|
name: rake
|
96
85
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
86
|
requirements:
|
99
|
-
- -
|
87
|
+
- - ">="
|
100
88
|
- !ruby/object:Gem::Version
|
101
89
|
version: '0'
|
102
90
|
type: :development
|
103
91
|
prerelease: false
|
104
92
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
93
|
requirements:
|
107
|
-
- -
|
94
|
+
- - ">="
|
108
95
|
- !ruby/object:Gem::Version
|
109
96
|
version: '0'
|
110
97
|
- !ruby/object:Gem::Dependency
|
111
98
|
name: webmock
|
112
99
|
requirement: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
100
|
requirements:
|
115
|
-
- -
|
101
|
+
- - ">="
|
116
102
|
- !ruby/object:Gem::Version
|
117
103
|
version: '0'
|
118
104
|
type: :development
|
119
105
|
prerelease: false
|
120
106
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
107
|
requirements:
|
123
|
-
- -
|
108
|
+
- - ">="
|
124
109
|
- !ruby/object:Gem::Version
|
125
110
|
version: '0'
|
126
111
|
- !ruby/object:Gem::Dependency
|
127
112
|
name: codeclimate-test-reporter
|
128
113
|
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
114
|
requirements:
|
131
|
-
- -
|
115
|
+
- - ">="
|
132
116
|
- !ruby/object:Gem::Version
|
133
117
|
version: '0'
|
134
118
|
type: :development
|
135
119
|
prerelease: false
|
136
120
|
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
121
|
requirements:
|
139
|
-
- -
|
122
|
+
- - ">="
|
140
123
|
- !ruby/object:Gem::Version
|
141
124
|
version: '0'
|
142
125
|
- !ruby/object:Gem::Dependency
|
143
126
|
name: hashie
|
144
127
|
requirement: !ruby/object:Gem::Requirement
|
145
|
-
none: false
|
146
128
|
requirements:
|
147
|
-
- -
|
129
|
+
- - ">="
|
148
130
|
- !ruby/object:Gem::Version
|
149
131
|
version: '0'
|
150
132
|
type: :runtime
|
151
133
|
prerelease: false
|
152
134
|
version_requirements: !ruby/object:Gem::Requirement
|
153
|
-
none: false
|
154
135
|
requirements:
|
155
|
-
- -
|
136
|
+
- - ">="
|
156
137
|
- !ruby/object:Gem::Version
|
157
138
|
version: '0'
|
158
139
|
- !ruby/object:Gem::Dependency
|
159
140
|
name: statsample
|
160
141
|
requirement: !ruby/object:Gem::Requirement
|
161
|
-
none: false
|
162
142
|
requirements:
|
163
|
-
- -
|
143
|
+
- - ">="
|
164
144
|
- !ruby/object:Gem::Version
|
165
145
|
version: '0'
|
166
146
|
type: :runtime
|
167
147
|
prerelease: false
|
168
148
|
version_requirements: !ruby/object:Gem::Requirement
|
169
|
-
none: false
|
170
149
|
requirements:
|
171
|
-
- -
|
150
|
+
- - ">="
|
172
151
|
- !ruby/object:Gem::Version
|
173
152
|
version: '0'
|
174
153
|
- !ruby/object:Gem::Dependency
|
175
154
|
name: elasticsearch
|
176
155
|
requirement: !ruby/object:Gem::Requirement
|
177
|
-
none: false
|
178
156
|
requirements:
|
179
|
-
- -
|
157
|
+
- - ">="
|
180
158
|
- !ruby/object:Gem::Version
|
181
159
|
version: '0'
|
182
160
|
type: :runtime
|
183
161
|
prerelease: false
|
184
162
|
version_requirements: !ruby/object:Gem::Requirement
|
185
|
-
none: false
|
186
163
|
requirements:
|
187
|
-
- -
|
164
|
+
- - ">="
|
188
165
|
- !ruby/object:Gem::Version
|
189
166
|
version: '0'
|
190
167
|
description:
|
@@ -194,8 +171,8 @@ executables: []
|
|
194
171
|
extensions: []
|
195
172
|
extra_rdoc_files: []
|
196
173
|
files:
|
197
|
-
- .gitignore
|
198
|
-
- .ruby-version
|
174
|
+
- ".gitignore"
|
175
|
+
- ".ruby-version"
|
199
176
|
- Gemfile
|
200
177
|
- Guardfile
|
201
178
|
- LICENSE
|
@@ -206,9 +183,16 @@ files:
|
|
206
183
|
- lib/elastic/stats.rb
|
207
184
|
- lib/elastic/stats/elastic_client.rb
|
208
185
|
- lib/elastic/stats/ks.rb
|
186
|
+
- lib/elastic/stats/naive-bayes.rb
|
187
|
+
- lib/elastic/stats/naive-bayes/predictor.rb
|
188
|
+
- lib/elastic/stats/naive-bayes/set.rb
|
189
|
+
- lib/elastic/stats/naive-bayes/token_stats.rb
|
209
190
|
- lib/elastic/stats/version.rb
|
210
191
|
- spec/elastic/stats/elastic_client_spec.rb
|
211
192
|
- spec/elastic/stats/ks_spec.rb
|
193
|
+
- spec/elastic/stats/naive-bayes/predictor_spec.rb
|
194
|
+
- spec/elastic/stats/naive-bayes/set_spec.rb
|
195
|
+
- spec/elastic/stats/naive-bayes/token_stats_spec.rb
|
212
196
|
- spec/fixtures/basic_search_request.json
|
213
197
|
- spec/fixtures/successful_search.json
|
214
198
|
- spec/helpers/utility.rb
|
@@ -216,31 +200,33 @@ files:
|
|
216
200
|
homepage: https://github.com/eagerelk/elastic-stats-ruby
|
217
201
|
licenses:
|
218
202
|
- MIT
|
203
|
+
metadata: {}
|
219
204
|
post_install_message:
|
220
205
|
rdoc_options: []
|
221
206
|
require_paths:
|
222
207
|
- lib
|
223
208
|
required_ruby_version: !ruby/object:Gem::Requirement
|
224
|
-
none: false
|
225
209
|
requirements:
|
226
|
-
- -
|
210
|
+
- - ">="
|
227
211
|
- !ruby/object:Gem::Version
|
228
212
|
version: '0'
|
229
213
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
230
|
-
none: false
|
231
214
|
requirements:
|
232
|
-
- -
|
215
|
+
- - ">="
|
233
216
|
- !ruby/object:Gem::Version
|
234
217
|
version: '0'
|
235
218
|
requirements: []
|
236
219
|
rubyforge_project:
|
237
|
-
rubygems_version:
|
220
|
+
rubygems_version: 2.4.7
|
238
221
|
signing_key:
|
239
|
-
specification_version:
|
222
|
+
specification_version: 4
|
240
223
|
summary: An utility to fetch various statistics from Elasticsearch.
|
241
224
|
test_files:
|
242
225
|
- spec/elastic/stats/elastic_client_spec.rb
|
243
226
|
- spec/elastic/stats/ks_spec.rb
|
227
|
+
- spec/elastic/stats/naive-bayes/predictor_spec.rb
|
228
|
+
- spec/elastic/stats/naive-bayes/set_spec.rb
|
229
|
+
- spec/elastic/stats/naive-bayes/token_stats_spec.rb
|
244
230
|
- spec/fixtures/basic_search_request.json
|
245
231
|
- spec/fixtures/successful_search.json
|
246
232
|
- spec/helpers/utility.rb
|