elastic-stats 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +29 -3
- data/lib/elastic/stats/elastic_client.rb +9 -0
- data/lib/elastic/stats/ks.rb +16 -13
- data/lib/elastic/stats/naive-bayes.rb +9 -0
- data/lib/elastic/stats/naive-bayes/predictor.rb +43 -0
- data/lib/elastic/stats/naive-bayes/set.rb +103 -0
- data/lib/elastic/stats/naive-bayes/token_stats.rb +64 -0
- data/lib/elastic/stats/version.rb +1 -1
- data/spec/elastic/stats/elastic_client_spec.rb +46 -29
- data/spec/elastic/stats/ks_spec.rb +26 -3
- data/spec/elastic/stats/naive-bayes/predictor_spec.rb +12 -0
- data/spec/elastic/stats/naive-bayes/set_spec.rb +14 -0
- data/spec/elastic/stats/naive-bayes/token_stats_spec.rb +66 -0
- data/spec/spec_helper.rb +2 -1
- metadata +41 -55
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 14b0b8911410832cfbb96d6d872be82f87a52091
|
4
|
+
data.tar.gz: 247ff87c96e46487e2078e509122efc518bbf760
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ccbbe4e10c740ece220fd8942b1cac41c3d8ce0e86ce6f34ab569d59a3671c5f5f0a5060232d32006cdc391dac0388f56e952c2e65efe4484fdf73a76dcfa8b5
|
7
|
+
data.tar.gz: e63a3e56d5b01c4c8b2ba10aee7ae674386677437f795b9e6689bcc4913f40b4d75514e71ae53b5c0998ef2585aa1cd4edb6f33bf120a12ffd6dac4d70c22826
|
data/README.md
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# Elastic::Stats
|
2
2
|
|
3
|
-
TODO: Write a gem description
|
4
|
-
|
5
3
|
## Installation
|
6
4
|
|
7
5
|
Add this line to your application's Gemfile:
|
@@ -18,7 +16,35 @@ Or install it yourself as:
|
|
18
16
|
|
19
17
|
## Usage
|
20
18
|
|
21
|
-
|
19
|
+
### KS
|
20
|
+
|
21
|
+
~~~
|
22
|
+
require 'elastic/stats/ks'
|
23
|
+
require 'logger'
|
24
|
+
|
25
|
+
# Set the URL
|
26
|
+
ENV['ELASTICSEARCH_URL'] = 'https://test.eagerelk.com/'
|
27
|
+
# This is perfect for logstash stats
|
28
|
+
stats = Elastic::Stats::KS.new 'logstash-2015.03.05'
|
29
|
+
# Set some client options to enable logging and debugging
|
30
|
+
stats.client_options = {
|
31
|
+
debug: true,
|
32
|
+
logger: Logger.new(STDOUT),
|
33
|
+
request_body: true,
|
34
|
+
transport_options: {
|
35
|
+
ssl: { verify: false }
|
36
|
+
}
|
37
|
+
}
|
38
|
+
# Add extra filters to the query
|
39
|
+
stats.query = { filtered: { filter: { fquery: { query: { query_string:{ query: "type:(\"crimson_db\")"}}}}}}
|
40
|
+
|
41
|
+
# Fetch and output the stats
|
42
|
+
puts stats.fetch.inspect
|
43
|
+
~~~
|
44
|
+
|
45
|
+
### Naive Bayes Filter
|
46
|
+
|
47
|
+
TODO
|
22
48
|
|
23
49
|
## Contributing
|
24
50
|
|
@@ -5,6 +5,7 @@ module Elastic
|
|
5
5
|
# Module to set up and manage the Elasticsearch client
|
6
6
|
module ElasticClient
|
7
7
|
attr_writer :client
|
8
|
+
attr_accessor :index, :type
|
8
9
|
|
9
10
|
def client
|
10
11
|
@client ||= Elasticsearch::Client.new client_options
|
@@ -18,6 +19,14 @@ module Elastic
|
|
18
19
|
client_options.update(options)
|
19
20
|
end
|
20
21
|
|
22
|
+
def search(options = {})
|
23
|
+
client.search({ index: index, type: type }.merge(options))
|
24
|
+
end
|
25
|
+
|
26
|
+
def analyze(options = {})
|
27
|
+
client.indices.analyze({ index: index, type: type }.merge(options))
|
28
|
+
end
|
29
|
+
|
21
30
|
private
|
22
31
|
|
23
32
|
def default_options
|
data/lib/elastic/stats/ks.rb
CHANGED
@@ -9,8 +9,7 @@ module Elastic
|
|
9
9
|
class KS
|
10
10
|
include ElasticClient
|
11
11
|
|
12
|
-
attr_accessor :logger
|
13
|
-
attr_writer :debug, :query
|
12
|
+
attr_accessor :logger, :query
|
14
13
|
attr_reader :indices, :to, :from, :span, :interval, :field
|
15
14
|
|
16
15
|
MULTIPLIERS = {
|
@@ -34,8 +33,7 @@ module Elastic
|
|
34
33
|
@field = options.delete(:field)
|
35
34
|
@offset = options.delete(:offset)
|
36
35
|
|
37
|
-
@indices = [indices]
|
38
|
-
@to = @to.to_i if @to.respond_to?(:to_i)
|
36
|
+
@indices = [indices] unless @indices.is_a? Array
|
39
37
|
@from = @to - @span
|
40
38
|
end
|
41
39
|
|
@@ -63,26 +61,31 @@ module Elastic
|
|
63
61
|
)
|
64
62
|
end
|
65
63
|
|
64
|
+
private
|
65
|
+
|
66
66
|
def range(from, to)
|
67
67
|
Hashie::Mash.new(
|
68
|
-
client.search index: indices.join(','), body:
|
68
|
+
client.search index: indices.join(','), body: body(from, to)
|
69
69
|
).aggregations.hits_per_minute.buckets.collect(&:doc_count)
|
70
70
|
end
|
71
71
|
|
72
|
-
def
|
73
|
-
|
74
|
-
|
72
|
+
def body(from, to)
|
73
|
+
body = Hashie::Mash.new
|
74
|
+
body.query = query if query
|
75
|
+
body.aggregations!.hits_per_minute!.date_histogram = aggregate(from, to)
|
76
|
+
body
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def aggregate(from, to)
|
82
|
+
{
|
75
83
|
field: field, interval: interval, min_doc_count: 0,
|
76
84
|
extended_bounds: {
|
77
85
|
min: (from * 1000),
|
78
86
|
max: (to * 1000)
|
79
87
|
}
|
80
88
|
}
|
81
|
-
@query
|
82
|
-
end
|
83
|
-
|
84
|
-
def debug?
|
85
|
-
@debug ||= ENV['DEBUG']
|
86
89
|
end
|
87
90
|
|
88
91
|
private
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'elastic/stats/elastic_client'
|
2
|
+
require 'elastic/stats/naive-bayes/token_stats'
|
3
|
+
|
4
|
+
module Elastic
|
5
|
+
module Stats
|
6
|
+
module NaiveBayes
|
7
|
+
# Utility to perform Naive Bayes category predictions on text
|
8
|
+
class Predictor
|
9
|
+
include ElasticClient
|
10
|
+
|
11
|
+
attr_reader :prior_set
|
12
|
+
attr_writer :adjust
|
13
|
+
|
14
|
+
def initialize(prior_set)
|
15
|
+
@prior_set = prior_set
|
16
|
+
end
|
17
|
+
|
18
|
+
def guess(subject)
|
19
|
+
scores = {}
|
20
|
+
prior_set.categories.keys.each do |category|
|
21
|
+
scores[category] = score(subject, category)
|
22
|
+
end
|
23
|
+
Hash[scores.sort_by { |label, score| -score }]
|
24
|
+
end
|
25
|
+
|
26
|
+
def score(subject, category)
|
27
|
+
# Calculate the propability for each token in this category
|
28
|
+
log_sum = tokenize(subject).reduce(0) do |sum, token|
|
29
|
+
stats = TokenStats.new(token, prior_set)
|
30
|
+
sum + stats.bayes(category)
|
31
|
+
end
|
32
|
+
|
33
|
+
1 / (1 + Math.exp(log_sum))
|
34
|
+
end
|
35
|
+
|
36
|
+
def tokenize(subject)
|
37
|
+
@tokenize ||= Hash.new { |h, k| h[k] = prior_set.tokenize k }
|
38
|
+
@tokenize[subject]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'hashie'
|
2
|
+
require 'elastic/stats/elastic_client'
|
3
|
+
|
4
|
+
module Elastic
|
5
|
+
module Stats
|
6
|
+
module NaiveBayes
|
7
|
+
# A set of documents against which statistics will be calculated
|
8
|
+
class Set
|
9
|
+
include ElasticClient
|
10
|
+
|
11
|
+
attr_reader :category_field, :subject_field, :index, :type
|
12
|
+
|
13
|
+
def initialize(index, type, category_field, subject_field)
|
14
|
+
@index = index
|
15
|
+
@type = type
|
16
|
+
@category_field = category_field
|
17
|
+
@subject_field = subject_field
|
18
|
+
end
|
19
|
+
|
20
|
+
def count
|
21
|
+
init_stats if @count.nil?
|
22
|
+
@count
|
23
|
+
end
|
24
|
+
|
25
|
+
def categories
|
26
|
+
init_stats if @categories.nil?
|
27
|
+
@categories
|
28
|
+
end
|
29
|
+
|
30
|
+
def tokens
|
31
|
+
@tokens ||= Hash.new { |h, k| h[k] = count_search[k]['hits']['total'] }
|
32
|
+
end
|
33
|
+
|
34
|
+
def token_categories
|
35
|
+
@token_categories ||= Hash.new do |h, k|
|
36
|
+
result = count_search[k]['aggregations']['counts']['buckets'].map do |bucket|
|
37
|
+
{ bucket['key'] => bucket['doc_count'] }
|
38
|
+
end
|
39
|
+
h[k] = Hash.new(0).merge(result.reduce(:merge))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def tokenize(subject)
|
44
|
+
results = analyze field: subject_field, text: subject
|
45
|
+
results['tokens'].collect { |x| x['token'] }
|
46
|
+
end
|
47
|
+
|
48
|
+
# Elasticsearch client helper methods
|
49
|
+
def search(options = {})
|
50
|
+
client.search({ index: index, type: type }.merge(options))
|
51
|
+
end
|
52
|
+
|
53
|
+
def analyze(options = {})
|
54
|
+
client.indices.analyze({ index: index }.merge(options))
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def init_stats
|
60
|
+
results = Hashie::Mash.new(
|
61
|
+
search(search_type: 'count', body: aggregation)
|
62
|
+
)
|
63
|
+
|
64
|
+
@count = results.hits.total
|
65
|
+
@categories = results.aggregations.counts.buckets.map do |bucket|
|
66
|
+
{ bucket['key'] => bucket['doc_count'] }
|
67
|
+
end
|
68
|
+
@categories = @categories.reduce(:merge)
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def count_search
|
74
|
+
@count_search ||= Hash.new{ |h, k| h[k] = search search_type: 'count', body: token_query(k) }
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def aggregation
|
80
|
+
{
|
81
|
+
aggs: {
|
82
|
+
counts: {
|
83
|
+
terms: {
|
84
|
+
field: category_field,
|
85
|
+
size: 200 # We're assuming there's less than 200 categories
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
}
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def token_query(token)
|
95
|
+
body = Hashie::Mash.new
|
96
|
+
body.query!.filtered!.filter!.term!
|
97
|
+
body.query.filtered.filter.term[subject_field] = token
|
98
|
+
body.merge aggregation
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module Elastic
|
2
|
+
module Stats
|
3
|
+
module NaiveBayes
|
4
|
+
# Provide statistics about a token in a specific set of data
|
5
|
+
class TokenStats
|
6
|
+
attr_reader :token, :set
|
7
|
+
|
8
|
+
def initialize(token, set)
|
9
|
+
@token = token
|
10
|
+
@set = set
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns the number of documents that contains the token
|
14
|
+
def count
|
15
|
+
set.tokens[token]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns the categories associated with the token in the set as a Hash
|
19
|
+
def categories
|
20
|
+
set.token_categories[token]
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the probability that a token is in the specified category
|
24
|
+
def probability(category)
|
25
|
+
return 0 unless categories.has_key? category
|
26
|
+
return 0 if set.categories[category] == 0
|
27
|
+
categories[category] / set.categories[category].to_f
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns the inverse probability that a token is in the category
|
31
|
+
def inverse(category)
|
32
|
+
return 0 unless categories.has_key? category
|
33
|
+
return 0 if (set.count - set.categories[category]) == 0
|
34
|
+
(count - categories[category]) / \
|
35
|
+
(set.count - set.categories[category]).to_f
|
36
|
+
end
|
37
|
+
|
38
|
+
def bayes(category)
|
39
|
+
return 0 if count == 0
|
40
|
+
return 0 if (probability(category) + inverse(category)) == 0
|
41
|
+
calculated = log_protect(
|
42
|
+
probability(category) / (probability(category) + inverse(category))
|
43
|
+
)
|
44
|
+
adjust(calculated)
|
45
|
+
Math.log(1 - calculated) - Math.log(calculated)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def adjust(probability, weight = 1, target = 0.5)
|
51
|
+
((weight * target) + (count * probability)) / (1 + count)
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def log_protect(probability)
|
57
|
+
return 0.0001 if probability == 0
|
58
|
+
return 0.9999 if probability == 1
|
59
|
+
probability
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -8,44 +8,61 @@ end
|
|
8
8
|
|
9
9
|
describe Elastic::Stats::ElasticClient do
|
10
10
|
subject { ElasticClientTest.new }
|
11
|
-
it 'allows the client to be set' do
|
12
|
-
client = Object.new
|
13
|
-
subject.client = client
|
14
11
|
|
15
|
-
|
16
|
-
|
12
|
+
context '#client' do
|
13
|
+
it 'allows the client to be set' do
|
14
|
+
client = Object.new
|
15
|
+
subject.client = client
|
17
16
|
|
18
|
-
|
19
|
-
|
20
|
-
url: ENV['ELASTICSEARCH_URL']
|
21
|
-
)
|
17
|
+
expect(subject.client).to be client
|
18
|
+
end
|
22
19
|
end
|
23
20
|
|
24
|
-
|
25
|
-
|
21
|
+
context '#client_options' do
|
22
|
+
it 'has sane default options' do
|
23
|
+
expect(subject.client_options).to eq(
|
24
|
+
url: ENV['ELASTICSEARCH_URL']
|
25
|
+
)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'allows the client options to be retrieved' do
|
29
|
+
expect(subject.client_options).to eq(url: ENV['ELASTICSEARCH_URL'])
|
30
|
+
end
|
26
31
|
end
|
27
32
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
33
|
+
context '#client_options=' do
|
34
|
+
it 'allows the default client options to be added' do
|
35
|
+
logger = Object.new
|
36
|
+
options = {
|
37
|
+
debug: true,
|
38
|
+
logger: logger
|
39
|
+
}
|
40
|
+
subject.client_options = options
|
41
|
+
|
42
|
+
options.update(url: ENV['ELASTICSEARCH_URL'])
|
43
|
+
expect(subject.client_options).to eq options
|
44
|
+
end
|
35
45
|
|
36
|
-
options
|
37
|
-
|
46
|
+
it 'allows the default client options to be overriden' do
|
47
|
+
logger = Object.new
|
48
|
+
options = {
|
49
|
+
debug: true,
|
50
|
+
logger: logger,
|
51
|
+
url: 'http://mytesturl.com:9200/'
|
52
|
+
}
|
53
|
+
subject.client_options = options
|
54
|
+
|
55
|
+
expect(subject.client_options).to eq options
|
56
|
+
end
|
38
57
|
end
|
39
58
|
|
40
|
-
|
41
|
-
|
42
|
-
options
|
43
|
-
|
44
|
-
logger: logger,
|
45
|
-
url: 'http://mytesturl.com:9200/'
|
46
|
-
}
|
47
|
-
subject.client_options = options
|
59
|
+
context '#search' do
|
60
|
+
it 'sends a search request using the specified type and index'
|
61
|
+
it 'sends a search request using the passed in options'
|
62
|
+
end
|
48
63
|
|
49
|
-
|
64
|
+
context '#analyze' do
|
65
|
+
it 'sends an analyze request using the specified type and index'
|
66
|
+
it 'sends an analyze request using the passed in options'
|
50
67
|
end
|
51
68
|
end
|
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
require 'elastic/stats/ks'
|
3
|
-
require 'webmock'
|
4
3
|
|
5
4
|
describe Elastic::Stats::KS do
|
6
5
|
it 'has an array of indices' do
|
@@ -26,13 +25,21 @@ describe Elastic::Stats::KS do
|
|
26
25
|
field: '@mytimefield'
|
27
26
|
}
|
28
27
|
ks = Elastic::Stats::KS.new('logstash-2015.12.12', options)
|
29
|
-
expect(ks.to).to eq now
|
30
|
-
expect(ks.from).to eq(ks.to
|
28
|
+
expect(ks.to).to eq now
|
29
|
+
expect(ks.from).to eq(ks.to - (60 * 60 * 24))
|
31
30
|
expect(ks.interval).to eq '5m'
|
32
31
|
expect(ks.field).to eq '@mytimefield'
|
33
32
|
end
|
34
33
|
|
35
34
|
context 'fetch' do
|
35
|
+
before(:all) do
|
36
|
+
WebMock.enable!
|
37
|
+
end
|
38
|
+
|
39
|
+
after(:all) do
|
40
|
+
WebMock.disable!
|
41
|
+
end
|
42
|
+
|
36
43
|
subject do
|
37
44
|
WebMock.stub_request(:get, 'http://localhost:9200/fake/_search')
|
38
45
|
.to_return(
|
@@ -68,4 +75,20 @@ describe Elastic::Stats::KS do
|
|
68
75
|
expect(subject[:comparison]).to eq 0.9616652224137048
|
69
76
|
end
|
70
77
|
end
|
78
|
+
|
79
|
+
context 'query' do
|
80
|
+
it 'has no default query' do
|
81
|
+
ks = Elastic::Stats::KS.new('logstash-2015.12.12')
|
82
|
+
|
83
|
+
expect(ks.query).to be_nil
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'allows for the query to be set' do
|
87
|
+
ks = Elastic::Stats::KS.new('logstash-2015.12.12')
|
88
|
+
query = { 'term' => { 'user' => 'eagerelk' } }
|
89
|
+
ks.query = query
|
90
|
+
|
91
|
+
expect(ks.query).to eq query
|
92
|
+
end
|
93
|
+
end
|
71
94
|
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'elastic/stats/naive-bayes/set'
|
3
|
+
|
4
|
+
describe Elastic::Stats::NaiveBayes::Set do
|
5
|
+
subject do
|
6
|
+
Elastic::Stats::NaiveBayes::Set.new('transactions', 'training', 'category')
|
7
|
+
end
|
8
|
+
|
9
|
+
context '#tokens' do
|
10
|
+
it 'works'
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'elastic/stats/naive-bayes/set'
|
3
|
+
|
4
|
+
describe Elastic::Stats::NaiveBayes::Set do
|
5
|
+
subject do
|
6
|
+
Elastic::Stats::NaiveBayes::Set.new(
|
7
|
+
'transactions', 'training', 'category', 'subject'
|
8
|
+
)
|
9
|
+
end
|
10
|
+
|
11
|
+
context '#tokens' do
|
12
|
+
it 'works'
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'elastic/stats/naive-bayes/token_stats'
|
3
|
+
|
4
|
+
# Testing Set
|
5
|
+
class TestSet
|
6
|
+
def count
|
7
|
+
10
|
8
|
+
end
|
9
|
+
|
10
|
+
def categories
|
11
|
+
{
|
12
|
+
afrikaans: 4,
|
13
|
+
english: 5,
|
14
|
+
french: 1
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
def tokens
|
19
|
+
{
|
20
|
+
'is' => 7
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def token_categories
|
25
|
+
{
|
26
|
+
'is' => {
|
27
|
+
afrikaans: 4,
|
28
|
+
english: 4
|
29
|
+
}
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def tokenize(subject)
|
34
|
+
subject.downcase.split
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def subject
|
39
|
+
@subject ||= Elastic::Stats::NaiveBayes::TokenStats.new 'is', TestSet.new
|
40
|
+
end
|
41
|
+
|
42
|
+
describe Elastic::Stats::NaiveBayes::TokenStats do
|
43
|
+
subject do
|
44
|
+
TokenStats.new 'is', TestSet.new
|
45
|
+
end
|
46
|
+
|
47
|
+
context '#count' do
|
48
|
+
it 'returns the number of documents that contains the token'
|
49
|
+
it 'returns the categories as an integer'
|
50
|
+
end
|
51
|
+
|
52
|
+
context '#categories' do
|
53
|
+
it 'returns the categories associated with the token in the set'
|
54
|
+
it 'returns the categories as a Hash'
|
55
|
+
end
|
56
|
+
|
57
|
+
context '#probability' do
|
58
|
+
it 'returns the probability that a token is in the specified category'
|
59
|
+
it 'returns the probability as a float'
|
60
|
+
end
|
61
|
+
|
62
|
+
context '#inverse' do
|
63
|
+
it 'returns the inverse probability that a token is in the category'
|
64
|
+
it 'returns the probability as a float'
|
65
|
+
end
|
66
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,190 +1,167 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elastic-stats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.2
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Jugrens du Toit
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2015-
|
11
|
+
date: 2015-05-27 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bundler
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.6'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- - ~>
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '1.6'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rspec
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - ">="
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - ">="
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: guard
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - ">="
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- -
|
52
|
+
- - ">="
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: guard-rspec
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
|
-
- -
|
59
|
+
- - ">="
|
68
60
|
- !ruby/object:Gem::Version
|
69
61
|
version: '0'
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
|
-
- -
|
66
|
+
- - ">="
|
76
67
|
- !ruby/object:Gem::Version
|
77
68
|
version: '0'
|
78
69
|
- !ruby/object:Gem::Dependency
|
79
70
|
name: rubocop
|
80
71
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
72
|
requirements:
|
83
|
-
- -
|
73
|
+
- - ">="
|
84
74
|
- !ruby/object:Gem::Version
|
85
75
|
version: '0'
|
86
76
|
type: :development
|
87
77
|
prerelease: false
|
88
78
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
79
|
requirements:
|
91
|
-
- -
|
80
|
+
- - ">="
|
92
81
|
- !ruby/object:Gem::Version
|
93
82
|
version: '0'
|
94
83
|
- !ruby/object:Gem::Dependency
|
95
84
|
name: rake
|
96
85
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
86
|
requirements:
|
99
|
-
- -
|
87
|
+
- - ">="
|
100
88
|
- !ruby/object:Gem::Version
|
101
89
|
version: '0'
|
102
90
|
type: :development
|
103
91
|
prerelease: false
|
104
92
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
93
|
requirements:
|
107
|
-
- -
|
94
|
+
- - ">="
|
108
95
|
- !ruby/object:Gem::Version
|
109
96
|
version: '0'
|
110
97
|
- !ruby/object:Gem::Dependency
|
111
98
|
name: webmock
|
112
99
|
requirement: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
100
|
requirements:
|
115
|
-
- -
|
101
|
+
- - ">="
|
116
102
|
- !ruby/object:Gem::Version
|
117
103
|
version: '0'
|
118
104
|
type: :development
|
119
105
|
prerelease: false
|
120
106
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
107
|
requirements:
|
123
|
-
- -
|
108
|
+
- - ">="
|
124
109
|
- !ruby/object:Gem::Version
|
125
110
|
version: '0'
|
126
111
|
- !ruby/object:Gem::Dependency
|
127
112
|
name: codeclimate-test-reporter
|
128
113
|
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
114
|
requirements:
|
131
|
-
- -
|
115
|
+
- - ">="
|
132
116
|
- !ruby/object:Gem::Version
|
133
117
|
version: '0'
|
134
118
|
type: :development
|
135
119
|
prerelease: false
|
136
120
|
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
121
|
requirements:
|
139
|
-
- -
|
122
|
+
- - ">="
|
140
123
|
- !ruby/object:Gem::Version
|
141
124
|
version: '0'
|
142
125
|
- !ruby/object:Gem::Dependency
|
143
126
|
name: hashie
|
144
127
|
requirement: !ruby/object:Gem::Requirement
|
145
|
-
none: false
|
146
128
|
requirements:
|
147
|
-
- -
|
129
|
+
- - ">="
|
148
130
|
- !ruby/object:Gem::Version
|
149
131
|
version: '0'
|
150
132
|
type: :runtime
|
151
133
|
prerelease: false
|
152
134
|
version_requirements: !ruby/object:Gem::Requirement
|
153
|
-
none: false
|
154
135
|
requirements:
|
155
|
-
- -
|
136
|
+
- - ">="
|
156
137
|
- !ruby/object:Gem::Version
|
157
138
|
version: '0'
|
158
139
|
- !ruby/object:Gem::Dependency
|
159
140
|
name: statsample
|
160
141
|
requirement: !ruby/object:Gem::Requirement
|
161
|
-
none: false
|
162
142
|
requirements:
|
163
|
-
- -
|
143
|
+
- - ">="
|
164
144
|
- !ruby/object:Gem::Version
|
165
145
|
version: '0'
|
166
146
|
type: :runtime
|
167
147
|
prerelease: false
|
168
148
|
version_requirements: !ruby/object:Gem::Requirement
|
169
|
-
none: false
|
170
149
|
requirements:
|
171
|
-
- -
|
150
|
+
- - ">="
|
172
151
|
- !ruby/object:Gem::Version
|
173
152
|
version: '0'
|
174
153
|
- !ruby/object:Gem::Dependency
|
175
154
|
name: elasticsearch
|
176
155
|
requirement: !ruby/object:Gem::Requirement
|
177
|
-
none: false
|
178
156
|
requirements:
|
179
|
-
- -
|
157
|
+
- - ">="
|
180
158
|
- !ruby/object:Gem::Version
|
181
159
|
version: '0'
|
182
160
|
type: :runtime
|
183
161
|
prerelease: false
|
184
162
|
version_requirements: !ruby/object:Gem::Requirement
|
185
|
-
none: false
|
186
163
|
requirements:
|
187
|
-
- -
|
164
|
+
- - ">="
|
188
165
|
- !ruby/object:Gem::Version
|
189
166
|
version: '0'
|
190
167
|
description:
|
@@ -194,8 +171,8 @@ executables: []
|
|
194
171
|
extensions: []
|
195
172
|
extra_rdoc_files: []
|
196
173
|
files:
|
197
|
-
- .gitignore
|
198
|
-
- .ruby-version
|
174
|
+
- ".gitignore"
|
175
|
+
- ".ruby-version"
|
199
176
|
- Gemfile
|
200
177
|
- Guardfile
|
201
178
|
- LICENSE
|
@@ -206,9 +183,16 @@ files:
|
|
206
183
|
- lib/elastic/stats.rb
|
207
184
|
- lib/elastic/stats/elastic_client.rb
|
208
185
|
- lib/elastic/stats/ks.rb
|
186
|
+
- lib/elastic/stats/naive-bayes.rb
|
187
|
+
- lib/elastic/stats/naive-bayes/predictor.rb
|
188
|
+
- lib/elastic/stats/naive-bayes/set.rb
|
189
|
+
- lib/elastic/stats/naive-bayes/token_stats.rb
|
209
190
|
- lib/elastic/stats/version.rb
|
210
191
|
- spec/elastic/stats/elastic_client_spec.rb
|
211
192
|
- spec/elastic/stats/ks_spec.rb
|
193
|
+
- spec/elastic/stats/naive-bayes/predictor_spec.rb
|
194
|
+
- spec/elastic/stats/naive-bayes/set_spec.rb
|
195
|
+
- spec/elastic/stats/naive-bayes/token_stats_spec.rb
|
212
196
|
- spec/fixtures/basic_search_request.json
|
213
197
|
- spec/fixtures/successful_search.json
|
214
198
|
- spec/helpers/utility.rb
|
@@ -216,31 +200,33 @@ files:
|
|
216
200
|
homepage: https://github.com/eagerelk/elastic-stats-ruby
|
217
201
|
licenses:
|
218
202
|
- MIT
|
203
|
+
metadata: {}
|
219
204
|
post_install_message:
|
220
205
|
rdoc_options: []
|
221
206
|
require_paths:
|
222
207
|
- lib
|
223
208
|
required_ruby_version: !ruby/object:Gem::Requirement
|
224
|
-
none: false
|
225
209
|
requirements:
|
226
|
-
- -
|
210
|
+
- - ">="
|
227
211
|
- !ruby/object:Gem::Version
|
228
212
|
version: '0'
|
229
213
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
230
|
-
none: false
|
231
214
|
requirements:
|
232
|
-
- -
|
215
|
+
- - ">="
|
233
216
|
- !ruby/object:Gem::Version
|
234
217
|
version: '0'
|
235
218
|
requirements: []
|
236
219
|
rubyforge_project:
|
237
|
-
rubygems_version:
|
220
|
+
rubygems_version: 2.4.7
|
238
221
|
signing_key:
|
239
|
-
specification_version:
|
222
|
+
specification_version: 4
|
240
223
|
summary: An utility to fetch various statistics from Elasticsearch.
|
241
224
|
test_files:
|
242
225
|
- spec/elastic/stats/elastic_client_spec.rb
|
243
226
|
- spec/elastic/stats/ks_spec.rb
|
227
|
+
- spec/elastic/stats/naive-bayes/predictor_spec.rb
|
228
|
+
- spec/elastic/stats/naive-bayes/set_spec.rb
|
229
|
+
- spec/elastic/stats/naive-bayes/token_stats_spec.rb
|
244
230
|
- spec/fixtures/basic_search_request.json
|
245
231
|
- spec/fixtures/successful_search.json
|
246
232
|
- spec/helpers/utility.rb
|