elasticated 1.2.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +123 -1
  3. data/elasticated.gemspec +1 -0
  4. data/lib/elasticated.rb +18 -3
  5. data/lib/elasticated/aggregations/filter_aggregation.rb +13 -6
  6. data/lib/elasticated/aggregations/geohash_grid_aggregation.rb +28 -0
  7. data/lib/elasticated/aggregations/group_aggregation.rb +16 -7
  8. data/lib/elasticated/aggregations/missing_aggregation.rb +34 -0
  9. data/lib/elasticated/aggregations/range_aggregation.rb +14 -5
  10. data/lib/elasticated/aggregations/safe_date_histogram_aggregation.rb +86 -0
  11. data/lib/elasticated/aggregations/single_value_aggregation.rb +7 -1
  12. data/lib/elasticated/aggregations/stats_aggregation.rb +13 -0
  13. data/lib/elasticated/aggregations/subaggregated.rb +10 -1
  14. data/lib/elasticated/aggregations/top_hits_aggregation.rb +2 -2
  15. data/lib/elasticated/aggregations/{count_distinct_aggregation.rb → value_count_aggregation.rb} +1 -1
  16. data/lib/elasticated/configurable.rb +13 -2
  17. data/lib/elasticated/configuration.rb +6 -0
  18. data/lib/elasticated/document.rb +2 -1
  19. data/lib/elasticated/helpers.rb +18 -0
  20. data/lib/elasticated/loggers/default_logger.rb +27 -0
  21. data/lib/elasticated/loggers/silent_logger.rb +27 -0
  22. data/lib/elasticated/query.rb +8 -0
  23. data/lib/elasticated/query_aggregations.rb +3 -4
  24. data/lib/elasticated/repository.rb +31 -30
  25. data/lib/elasticated/repository/intelligent_search.rb +46 -0
  26. data/lib/elasticated/repository/normal_search.rb +40 -0
  27. data/lib/elasticated/repository/resumable_search.rb +58 -0
  28. data/lib/elasticated/repository/scan_scroll_search.rb +43 -0
  29. data/lib/elasticated/repository/scroll_search.rb +45 -0
  30. data/lib/elasticated/repository/search.rb +45 -0
  31. data/lib/elasticated/repository/single_page_search.rb +13 -0
  32. data/lib/elasticated/results.rb +43 -25
  33. data/lib/version.rb +11 -1
  34. data/spec/aggregation_spec.rb +58 -32
  35. data/spec/document_spec.rb +4 -4
  36. data/spec/intelligent_search_spec.rb +88 -0
  37. data/spec/query_spec.rb +2 -2
  38. data/spec/results_spec.rb +9 -9
  39. metadata +38 -5
  40. data/lib/elasticated/aggregations/count_aggregation.rb +0 -15
  41. data/lib/elasticated/default_logger.rb +0 -27
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 38febd11375f1593d887dfa64c008e6b559e4b77
4
- data.tar.gz: 6cfaa6521ae4d55a7ed9ca37cc415acc33c02cf8
3
+ metadata.gz: 7c6d361a271487eaaa00bad98fc637d71577f84f
4
+ data.tar.gz: a1ad71c40b64d1681fe08558eaf0a6b7d19e49e2
5
5
  SHA512:
6
- metadata.gz: 4c0663916c7c1b3137c42065d5e51cc2dee51d4b70e53b2ae4ce4a8f199be8830c4fc4eea8da1e05b4409230b32baed127fcbbde692958351fea2cf4af9a0000
7
- data.tar.gz: 853799f2868ad8a545ee100558195d421c9c591f6fd7e71a1a0d5c1698b313edb5f4513e62f6600a9c06d0eec211efb29c05e48063ab0683ab91cadbf423b030
6
+ metadata.gz: c3c712d491f5846c8445765b5532fc0c3de738a429be5d00e1bbd6c11f1812e711eaaaa6facf0edad4a6563570aa243b15e748652ebbc82d0aebd3d1d329eacc
7
+ data.tar.gz: 40d195a39ecea6dc083487956a8442b69f7cf27e5947274a22f55c5fce64640f2708a53e16a3acd77fade4bb2e32cd38d4cff420e953d200293d28b5156cb524
data/README.md CHANGED
@@ -1,3 +1,125 @@
1
- # elasticated
1
+ # Elasticated
2
2
 
3
3
  Elasticsearch Wrapper, with Query & Mapping Builders
4
+
5
+ **HOW TO** instantiate a repository
6
+ ```ruby
7
+ repository = Elasticated::Repository.new # pointing to localhost
8
+ repository = Elasticated::Repository.new host: 'http://user:pass@192.168.1.2:9200' # pointing to some secured server
9
+ repository = Elasticated::Repository.new host: 'myhost', index: 'myindex', type: 'mytype' # pointing to some type of some index
10
+ ```
11
+
12
+ **HOW TO** build a query
13
+ ```ruby
14
+ query = Query.build do
15
+ filter do
16
+ equal :first_name, 'Pablo'
17
+ end
18
+ conditions do
19
+ between :age, 20, 25
20
+ must_not do
21
+ wildcard :second_name, 'Santiago*'
22
+ end
23
+ should do
24
+ gt :age, 23
25
+ lt :age, 22
26
+ end
27
+ minimum_should_match 1
28
+ end
29
+ post do
30
+ equal :city, 'CABA'
31
+ end
32
+ aggregations do
33
+ group :register_number, size: 10
34
+ end
35
+ from 5
36
+ size 15
37
+ end
38
+ query.build # see the result
39
+ ```
40
+
41
+ **HOW TO** execute a search
42
+ ```ruby
43
+ repository = Repository.new
44
+ query = Query.new
45
+ repository.execute_count query
46
+ repository.execute_search query
47
+ repository.execute_aggregations query
48
+ repository.execute_aggregated_search query
49
+ repository.delete_by query
50
+ repository.exists? query
51
+ ```
52
+
53
+ **HOW TO** build a document, index or update it
54
+ ```ruby
55
+ document = Document.create do |doc|
56
+ doc.id = 'my_unique_id'
57
+ doc.index = 'myindex'
58
+ doc.type = 'mytype'
59
+ doc.source = { user: 'Pablo', some_field: 'some_value' }
60
+ end
61
+ repository = Repository.new
62
+ repository.index_document document
63
+ repository.update_document document
64
+ ```
65
+
66
+ **HOW TO** start a *resumable* scroll
67
+ ```ruby
68
+ repository = Elasticated::Repository.new
69
+ query = Elasticated::Query.new
70
+ # prepare the 'search' object
71
+ search = repository.prepare_search query, index: 'my_index', type: 'my_type'
72
+ # fetch the first page of results
73
+ results = search.start
74
+ # get the scroll_id, and use it again later
75
+ scroll_id = search.scroll_id # also results.scroll_id is valid
76
+ ```
77
+
78
+ **HOW TO** resume a scroll
79
+ ```ruby
80
+ repository = Elasticated::Repository.new
81
+ scroll_id = '...' # the scroll_id returned by the 'search' object
82
+ # prepare the 'search' object
83
+ search = repository.restore_search scroll_id
84
+ # fetch the next page of results
85
+ results = search.fetch
86
+ # fetch all pages until the search ends
87
+ results.append search.fetch until search.completed?
88
+ ```
89
+
90
+ **HOW TO** build a mapping
91
+ ```ruby
92
+ mapping = Elasticated::Mapping.build do
93
+ type :content do
94
+ date :date
95
+ string :user
96
+ analyzed_string :user_alias
97
+ nested :user_purchases do
98
+ long :purchase_id
99
+ string :items
100
+ end
101
+ object :user_info do
102
+ string :address
103
+ end
104
+ end
105
+ end
106
+ ```
107
+
108
+ **HOW TO** configure the gem
109
+ ```ruby
110
+ Elasticated.configure do |config|
111
+ config.logger = Elasticated::Loggers::DefaultLogger.new
112
+ config.scroll_expiration_time = '3m'
113
+ config.scroll_page_size = 500
114
+ config.search_page_size = 1000
115
+ end
116
+ ```
117
+
118
+ **HOW TO** configure some specific repository
119
+ ```ruby
120
+ repository = Repository.new
121
+ repository.logger = Elasticated::Loggers::DefaultLogger.new
122
+ repository.scroll_expiration_time = '3m'
123
+ repository.scroll_page_size = 500
124
+ repository.search_page_size = 1000
125
+ ```
@@ -25,5 +25,6 @@ Gem::Specification.new do |spec|
25
25
 
26
26
  spec.add_runtime_dependency 'elasticsearch'
27
27
  spec.add_runtime_dependency 'hash_ext', '~> 0.1.1'
28
+ spec.add_runtime_dependency 'timing', '~> 0.0', '>= 0.0.9'
28
29
 
29
30
  end
@@ -1,6 +1,10 @@
1
1
  require 'json'
2
2
  require 'hash_ext'
3
3
  require 'elasticsearch'
4
+ require 'timing'
5
+
6
+ require_relative 'elasticated/loggers/silent_logger'
7
+ require_relative 'elasticated/loggers/default_logger'
4
8
 
5
9
  require_relative 'elasticated/helpers'
6
10
  require_relative 'elasticated/block_evaluation'
@@ -8,7 +12,6 @@ require_relative 'elasticated/clonable'
8
12
  require_relative 'elasticated/inspectionable'
9
13
  require_relative 'elasticated/configuration'
10
14
  require_relative 'elasticated/configurable'
11
- require_relative 'elasticated/default_logger'
12
15
 
13
16
  # query conditions
14
17
 
@@ -48,15 +51,20 @@ require_relative 'elasticated/aggregations/terms_aggregation'
48
51
 
49
52
  require_relative 'elasticated/aggregations/histogram_aggregation'
50
53
  require_relative 'elasticated/aggregations/date_histogram_aggregation'
54
+ require_relative 'elasticated/aggregations/safe_date_histogram_aggregation'
51
55
 
52
56
  require_relative 'elasticated/aggregations/single_value_aggregation'
53
57
  require_relative 'elasticated/aggregations/cardinality_aggregation'
54
- require_relative 'elasticated/aggregations/count_distinct_aggregation'
58
+ require_relative 'elasticated/aggregations/value_count_aggregation'
59
+ require_relative 'elasticated/aggregations/missing_aggregation'
60
+
61
+ require_relative 'elasticated/aggregations/stats_aggregation'
55
62
 
56
63
  require_relative 'elasticated/aggregations/group_aggregation'
57
- require_relative 'elasticated/aggregations/count_aggregation'
58
64
  require_relative 'elasticated/aggregations/sum_distinct_aggregation'
59
65
 
66
+ require_relative 'elasticated/aggregations/geohash_grid_aggregation'
67
+
60
68
  require_relative 'elasticated/aggregations/count_filtered_aggregation'
61
69
  require_relative 'elasticated/aggregations/filter_aggregation_evaluator'
62
70
  require_relative 'elasticated/aggregations/filter_aggregation'
@@ -78,6 +86,13 @@ require_relative 'elasticated/results'
78
86
 
79
87
  require_relative 'elasticated/client'
80
88
  require_relative 'elasticated/repository'
89
+ require_relative 'elasticated/repository/search'
90
+ require_relative 'elasticated/repository/intelligent_search'
91
+ require_relative 'elasticated/repository/single_page_search'
92
+ require_relative 'elasticated/repository/normal_search'
93
+ require_relative 'elasticated/repository/scroll_search'
94
+ require_relative 'elasticated/repository/scan_scroll_search'
95
+ require_relative 'elasticated/repository/resumable_search'
81
96
 
82
97
  require_relative 'elasticated/mapping'
83
98
  require_relative 'elasticated/mapping/partial'
@@ -2,11 +2,13 @@ module Elasticated
2
2
  class FilterAggregation < Aggregation
3
3
  include Subaggregated
4
4
 
5
- attr_accessor :_evaluator, :_filter_name
5
+ attr_accessor :_evaluator, :_filter_name, :compact, :include_count
6
6
 
7
7
  def initialize(filter_name, *args, &block)
8
- self._filter_name = filter_name
8
+ self._filter_name = filter_name
9
9
  super
10
+ self.compact = extra_params.delete(:compact) { false }
11
+ self.include_count = extra_params.delete(:include_count) { true }
10
12
  initialize_subaggregations FilterAggregationEvaluator.new, &block
11
13
  end
12
14
 
@@ -23,10 +25,15 @@ module Elasticated
23
25
  end
24
26
 
25
27
  def parse(response)
26
- response_body = response
27
- ret = { 'count' => response_body['doc_count'] }
28
- ret.merge! parse_subaggregations(response_body)
29
- ret
28
+ count = response['doc_count']
29
+
30
+ if _subaggregations.empty?
31
+ compact ? count : { 'count' => count }
32
+ else
33
+ parse_subaggregations(response).tap do |h|
34
+ h['count'] = count if include_count
35
+ end
36
+ end
30
37
  end
31
38
 
32
39
  end
@@ -0,0 +1,28 @@
1
+ module Elasticated
2
+ class GeohashGridAggregation < Aggregation
3
+
4
+ def default_name
5
+ "geohash_grid_by_#{field}"
6
+ end
7
+
8
+ # TODO: this is exactly the same as in SingleValueAggregation
9
+ def build
10
+ operation_info = { field: field }
11
+ operation_info.merge! extra_params
12
+ { operation => operation_info }
13
+ end
14
+
15
+ def parse(response)
16
+ response['buckets'].each_with_object({}) do |bucket, hash|
17
+ hash[bucket['key']] = bucket['doc_count']
18
+ end
19
+ end
20
+
21
+ protected
22
+
23
+ def operation
24
+ :geohash_grid
25
+ end
26
+
27
+ end
28
+ end
@@ -2,8 +2,12 @@ module Elasticated
2
2
  class GroupAggregation < TermsAggregation
3
3
  include Subaggregated
4
4
 
5
+ attr_accessor :compact, :include_count
6
+
5
7
  def initialize(field, *args, &block)
6
8
  super
9
+ self.compact = extra_params.delete(:compact) { false }
10
+ self.include_count = extra_params.delete(:include_count) { true }
7
11
  initialize_subaggregations &block
8
12
  end
9
13
 
@@ -12,16 +16,21 @@ module Elasticated
12
16
  end
13
17
 
14
18
  def build
15
- aggregation_struct = super
16
- aggregation_struct.merge! build_subaggregations
17
- aggregation_struct
19
+ super.merge build_subaggregations
18
20
  end
19
21
 
20
22
  def parse(response)
21
- response['buckets'].inject({}) do |hash, bucket|
22
- bucket_hash = { 'count' => bucket['doc_count'] }
23
- bucket_hash.merge! parse_subaggregations(bucket)
24
- hash.merge bucket['key'] => bucket_hash
23
+ response['buckets'].each_with_object({}) do |bucket, hash|
24
+ count = bucket['doc_count']
25
+
26
+ hash[bucket['key']] =
27
+ if _subaggregations.empty?
28
+ compact ? count : { 'count' => count }
29
+ else
30
+ parse_subaggregations(bucket).tap do |h|
31
+ h['count'] = count if include_count
32
+ end
33
+ end
25
34
  end
26
35
  end
27
36
 
@@ -0,0 +1,34 @@
1
+ module Elasticated
2
+ class MissingAggregation < SingleValueAggregation
3
+ include Subaggregated
4
+
5
+ attr_accessor :compact
6
+
7
+ def initialize(field, *args, &block)
8
+ super
9
+ self.compact = extra_params.delete(:compact) { false }
10
+ initialize_subaggregations &block
11
+ end
12
+
13
+ # implementation
14
+ def operation
15
+ :missing
16
+ end
17
+
18
+ # TODO: this is exactly the same as in GroupAggregation
19
+ def build
20
+ super.merge build_subaggregations
21
+ end
22
+
23
+ def parse(response)
24
+ _subaggregations.empty? ? super : parse_subaggregations(response)
25
+ end
26
+
27
+ protected
28
+
29
+ def result_key
30
+ 'doc_count'
31
+ end
32
+
33
+ end
34
+ end
@@ -2,10 +2,12 @@ module Elasticated
2
2
  class RangeAggregation < Aggregation
3
3
  include Subaggregated
4
4
 
5
- attr_accessor :_conditions
5
+ attr_accessor :_conditions, :compact, :include_count
6
6
 
7
7
  def initialize(field, *args, &block)
8
8
  super
9
+ self.compact = extra_params.delete(:compact) { false }
10
+ self.include_count = extra_params.delete(:include_count) { true }
9
11
  initialize_subaggregations RangeAggregationEvaluator.new, &block
10
12
  end
11
13
 
@@ -24,10 +26,17 @@ module Elasticated
24
26
  end
25
27
 
26
28
  def parse(response)
27
- response['buckets'].inject({}) do |hash, (key_name, values)|
28
- range_body = { 'count' => values['doc_count'] }
29
- range_body.merge! parse_subaggregations(values)
30
- hash.merge key_name => range_body
29
+ response['buckets'].each_with_object({}) do |(key_name, values), hash|
30
+ count = values['doc_count']
31
+
32
+ hash[key_name] =
33
+ if _subaggregations.empty?
34
+ compact ? count : { 'count' => count }
35
+ else
36
+ parse_subaggregations(values).tap do |h|
37
+ h['count'] = count if include_count
38
+ end
39
+ end
31
40
  end
32
41
  end
33
42
 
@@ -0,0 +1,86 @@
1
+ module Elasticated
2
+ class SafeDateHistogramAggregation < HistogramAggregation
3
+ include Subaggregated
4
+
5
+ DEFAULT_INTERVAL = '1d'
6
+
7
+ attr_accessor :offset, :time_zone, :points, :compact, :include_count
8
+
9
+ def initialize(field, opts={}, &block)
10
+ self.offset = opts.delete(:offset)
11
+ self.time_zone = opts.delete(:time_zone)
12
+ self.points = opts.fetch(:points)
13
+ opts.delete(:points)
14
+ self.compact = opts.delete(:compact) { false }
15
+ self.include_count = opts.delete(:include_count) { true }
16
+ interval = opts.delete(:interval) || DEFAULT_INTERVAL
17
+ super field, interval, opts, &block
18
+ end
19
+
20
+ def build
21
+ terms = { field: field, interval: safe_interval.to_s }
22
+ if offset # '1.4 style'
23
+ terms.merge! pre_offset: offset
24
+ terms.merge! post_offset: offset
25
+ end
26
+ if time_zone
27
+ terms.merge! time_zone: time_zone
28
+ end
29
+ terms.merge! extra_params
30
+ aggregation_struct = { date_histogram: terms }
31
+ aggregation_struct.merge! build_subaggregations
32
+ aggregation_struct
33
+ end
34
+
35
+ def parse(response)
36
+ response['buckets'].each_with_object({}) do |bucket, hash|
37
+ count = bucket['doc_count']
38
+ key = build_key(bucket)
39
+
40
+ if _subaggregations.empty?
41
+ hash[key] ||= 0
42
+ if compact
43
+ hash[key] = hash[key] + count
44
+ else
45
+ hash[key]['count'] ||= 0
46
+ hash[key]['count'] = hash[key]['count'] + count
47
+ end
48
+ else
49
+ parsed_subaggregations = parse_subaggregations(bucket)
50
+ hash[key] = value_for(parsed_subaggregations, hash, key).tap do |h|
51
+ h['count'] = count if include_count
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ protected
58
+
59
+ def build_key(bucket)
60
+ time = Timing::TimeInZone.at bucket['key'] / 1000, time_zone
61
+
62
+ if parsed_interval == safe_interval
63
+ time.iso8601
64
+ else
65
+ points.select { |p| p <= time.iso8601 }.sort.reverse.first
66
+ end
67
+ end
68
+
69
+ def safe_interval
70
+ parsed_interval > Timing::Interval.days(1) ? Timing::Interval.days(1) : parsed_interval
71
+ end
72
+
73
+ def parsed_interval
74
+ Timing::Interval.parse interval
75
+ end
76
+
77
+ def value_for(aggregation_value, hash, key)
78
+ if aggregation_value.is_a? Hash
79
+ Helpers.hash_sum(hash.fetch(key, {}), aggregation_value)
80
+ else
81
+ hash.fetch(key, 0) + aggregation_value
82
+ end
83
+ end
84
+
85
+ end
86
+ end