elasticated 1.2.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +123 -1
  3. data/elasticated.gemspec +1 -0
  4. data/lib/elasticated.rb +18 -3
  5. data/lib/elasticated/aggregations/filter_aggregation.rb +13 -6
  6. data/lib/elasticated/aggregations/geohash_grid_aggregation.rb +28 -0
  7. data/lib/elasticated/aggregations/group_aggregation.rb +16 -7
  8. data/lib/elasticated/aggregations/missing_aggregation.rb +34 -0
  9. data/lib/elasticated/aggregations/range_aggregation.rb +14 -5
  10. data/lib/elasticated/aggregations/safe_date_histogram_aggregation.rb +86 -0
  11. data/lib/elasticated/aggregations/single_value_aggregation.rb +7 -1
  12. data/lib/elasticated/aggregations/stats_aggregation.rb +13 -0
  13. data/lib/elasticated/aggregations/subaggregated.rb +10 -1
  14. data/lib/elasticated/aggregations/top_hits_aggregation.rb +2 -2
  15. data/lib/elasticated/aggregations/{count_distinct_aggregation.rb → value_count_aggregation.rb} +1 -1
  16. data/lib/elasticated/configurable.rb +13 -2
  17. data/lib/elasticated/configuration.rb +6 -0
  18. data/lib/elasticated/document.rb +2 -1
  19. data/lib/elasticated/helpers.rb +18 -0
  20. data/lib/elasticated/loggers/default_logger.rb +27 -0
  21. data/lib/elasticated/loggers/silent_logger.rb +27 -0
  22. data/lib/elasticated/query.rb +8 -0
  23. data/lib/elasticated/query_aggregations.rb +3 -4
  24. data/lib/elasticated/repository.rb +31 -30
  25. data/lib/elasticated/repository/intelligent_search.rb +46 -0
  26. data/lib/elasticated/repository/normal_search.rb +40 -0
  27. data/lib/elasticated/repository/resumable_search.rb +58 -0
  28. data/lib/elasticated/repository/scan_scroll_search.rb +43 -0
  29. data/lib/elasticated/repository/scroll_search.rb +45 -0
  30. data/lib/elasticated/repository/search.rb +45 -0
  31. data/lib/elasticated/repository/single_page_search.rb +13 -0
  32. data/lib/elasticated/results.rb +43 -25
  33. data/lib/version.rb +11 -1
  34. data/spec/aggregation_spec.rb +58 -32
  35. data/spec/document_spec.rb +4 -4
  36. data/spec/intelligent_search_spec.rb +88 -0
  37. data/spec/query_spec.rb +2 -2
  38. data/spec/results_spec.rb +9 -9
  39. metadata +38 -5
  40. data/lib/elasticated/aggregations/count_aggregation.rb +0 -15
  41. data/lib/elasticated/default_logger.rb +0 -27
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 38febd11375f1593d887dfa64c008e6b559e4b77
4
- data.tar.gz: 6cfaa6521ae4d55a7ed9ca37cc415acc33c02cf8
3
+ metadata.gz: 7c6d361a271487eaaa00bad98fc637d71577f84f
4
+ data.tar.gz: a1ad71c40b64d1681fe08558eaf0a6b7d19e49e2
5
5
  SHA512:
6
- metadata.gz: 4c0663916c7c1b3137c42065d5e51cc2dee51d4b70e53b2ae4ce4a8f199be8830c4fc4eea8da1e05b4409230b32baed127fcbbde692958351fea2cf4af9a0000
7
- data.tar.gz: 853799f2868ad8a545ee100558195d421c9c591f6fd7e71a1a0d5c1698b313edb5f4513e62f6600a9c06d0eec211efb29c05e48063ab0683ab91cadbf423b030
6
+ metadata.gz: c3c712d491f5846c8445765b5532fc0c3de738a429be5d00e1bbd6c11f1812e711eaaaa6facf0edad4a6563570aa243b15e748652ebbc82d0aebd3d1d329eacc
7
+ data.tar.gz: 40d195a39ecea6dc083487956a8442b69f7cf27e5947274a22f55c5fce64640f2708a53e16a3acd77fade4bb2e32cd38d4cff420e953d200293d28b5156cb524
data/README.md CHANGED
@@ -1,3 +1,125 @@
1
- # elasticated
1
+ # Elasticated
2
2
 
3
3
  Elasticsearch Wrapper, with Query & Mapping Builders
4
+
5
+ **HOW TO** instantiate a repository
6
+ ```ruby
7
+ repository = Elasticated::Repository.new # pointing to localhost
8
+ repository = Elasticated::Repository.new host: 'http://user:pass@192.168.1.2:9200' # pointing to some secured server
9
+ repository = Elasticated::Repository.new host: 'myhost', index: 'myindex', type: 'mytype' # pointing to some type of some index
10
+ ```
11
+
12
+ **HOW TO** build a query
13
+ ```ruby
14
+ query = Query.build do
15
+ filter do
16
+ equal :first_name, 'Pablo'
17
+ end
18
+ conditions do
19
+ between :age, 20, 25
20
+ must_not do
21
+ wildcard :second_name, 'Santiago*'
22
+ end
23
+ should do
24
+ gt :age, 23
25
+ lt :age, 22
26
+ end
27
+ minimum_should_match 1
28
+ end
29
+ post do
30
+ equal :city, 'CABA'
31
+ end
32
+ aggregations do
33
+ group :register_number, size: 10
34
+ end
35
+ from 5
36
+ size 15
37
+ end
38
+ query.build # see the result
39
+ ```
40
+
41
+ **HOW TO** execute a search
42
+ ```ruby
43
+ repository = Repository.new
44
+ query = Query.new
45
+ repository.execute_count query
46
+ repository.execute_search query
47
+ repository.execute_aggregations query
48
+ repository.execute_aggregated_search query
49
+ repository.delete_by query
50
+ repository.exists? query
51
+ ```
52
+
53
+ **HOW TO** build a document, index or update it
54
+ ```ruby
55
+ document = Document.create do |doc|
56
+ doc.id = 'my_unique_id'
57
+ doc.index = 'myindex'
58
+ doc.type = 'mytype'
59
+ doc.source = { user: 'Pablo', some_field: 'some_value' }
60
+ end
61
+ repository = Repository.new
62
+ repository.index_document document
63
+ repository.update_document document
64
+ ```
65
+
66
+ **HOW TO** start a *resumable* scroll
67
+ ```ruby
68
+ repository = Elasticated::Repository.new
69
+ query = Elasticated::Query.new
70
+ # prepare the 'search' object
71
+ search = repository.prepare_search query, index: 'my_index', type: 'my_type'
72
+ # fetch the first page of results
73
+ results = search.start
74
+ # get the scroll_id, and use it again later
75
+ scroll_id = search.scroll_id # also results.scroll_id is valid
76
+ ```
77
+
78
+ **HOW TO** resume a scroll
79
+ ```ruby
80
+ repository = Elasticated::Repository.new
81
+ scroll_id = '...' # the scroll_id returned by the 'search' object
82
+ # prepare the 'search' object
83
+ search = repository.restore_search scroll_id
84
+ # fetch the next page of results
85
+ results = search.fetch
86
+ # fetch all pages until the search ends
87
+ results.append search.fetch until search.completed?
88
+ ```
89
+
90
+ **HOW TO** build a mapping
91
+ ```ruby
92
+ mapping = Elasticated::Mapping.build do
93
+ type :content do
94
+ date :date
95
+ string :user
96
+ analyzed_string :user_alias
97
+ nested :user_purchases do
98
+ long :purchase_id
99
+ string :items
100
+ end
101
+ object :user_info do
102
+ string :address
103
+ end
104
+ end
105
+ end
106
+ ```
107
+
108
+ **HOW TO** configure the gem
109
+ ```ruby
110
+ Elasticated.configure do |config|
111
+ config.logger = Elasticated::Loggers::DefaultLogger.new
112
+ config.scroll_expiration_time = '3m'
113
+ config.scroll_page_size = 500
114
+ config.search_page_size = 1000
115
+ end
116
+ ```
117
+
118
+ **HOW TO** configure some specific repository
119
+ ```ruby
120
+ repository = Repository.new
121
+ repository.logger = Elasticated::Loggers::DefaultLogger.new
122
+ repository.scroll_expiration_time = '3m'
123
+ repository.scroll_page_size = 500
124
+ repository.search_page_size = 1000
125
+ ```
@@ -25,5 +25,6 @@ Gem::Specification.new do |spec|
25
25
 
26
26
  spec.add_runtime_dependency 'elasticsearch'
27
27
  spec.add_runtime_dependency 'hash_ext', '~> 0.1.1'
28
+ spec.add_runtime_dependency 'timing', '~> 0.0', '>= 0.0.9'
28
29
 
29
30
  end
@@ -1,6 +1,10 @@
1
1
  require 'json'
2
2
  require 'hash_ext'
3
3
  require 'elasticsearch'
4
+ require 'timing'
5
+
6
+ require_relative 'elasticated/loggers/silent_logger'
7
+ require_relative 'elasticated/loggers/default_logger'
4
8
 
5
9
  require_relative 'elasticated/helpers'
6
10
  require_relative 'elasticated/block_evaluation'
@@ -8,7 +12,6 @@ require_relative 'elasticated/clonable'
8
12
  require_relative 'elasticated/inspectionable'
9
13
  require_relative 'elasticated/configuration'
10
14
  require_relative 'elasticated/configurable'
11
- require_relative 'elasticated/default_logger'
12
15
 
13
16
  # query conditions
14
17
 
@@ -48,15 +51,20 @@ require_relative 'elasticated/aggregations/terms_aggregation'
48
51
 
49
52
  require_relative 'elasticated/aggregations/histogram_aggregation'
50
53
  require_relative 'elasticated/aggregations/date_histogram_aggregation'
54
+ require_relative 'elasticated/aggregations/safe_date_histogram_aggregation'
51
55
 
52
56
  require_relative 'elasticated/aggregations/single_value_aggregation'
53
57
  require_relative 'elasticated/aggregations/cardinality_aggregation'
54
- require_relative 'elasticated/aggregations/count_distinct_aggregation'
58
+ require_relative 'elasticated/aggregations/value_count_aggregation'
59
+ require_relative 'elasticated/aggregations/missing_aggregation'
60
+
61
+ require_relative 'elasticated/aggregations/stats_aggregation'
55
62
 
56
63
  require_relative 'elasticated/aggregations/group_aggregation'
57
- require_relative 'elasticated/aggregations/count_aggregation'
58
64
  require_relative 'elasticated/aggregations/sum_distinct_aggregation'
59
65
 
66
+ require_relative 'elasticated/aggregations/geohash_grid_aggregation'
67
+
60
68
  require_relative 'elasticated/aggregations/count_filtered_aggregation'
61
69
  require_relative 'elasticated/aggregations/filter_aggregation_evaluator'
62
70
  require_relative 'elasticated/aggregations/filter_aggregation'
@@ -78,6 +86,13 @@ require_relative 'elasticated/results'
78
86
 
79
87
  require_relative 'elasticated/client'
80
88
  require_relative 'elasticated/repository'
89
+ require_relative 'elasticated/repository/search'
90
+ require_relative 'elasticated/repository/intelligent_search'
91
+ require_relative 'elasticated/repository/single_page_search'
92
+ require_relative 'elasticated/repository/normal_search'
93
+ require_relative 'elasticated/repository/scroll_search'
94
+ require_relative 'elasticated/repository/scan_scroll_search'
95
+ require_relative 'elasticated/repository/resumable_search'
81
96
 
82
97
  require_relative 'elasticated/mapping'
83
98
  require_relative 'elasticated/mapping/partial'
@@ -2,11 +2,13 @@ module Elasticated
2
2
  class FilterAggregation < Aggregation
3
3
  include Subaggregated
4
4
 
5
- attr_accessor :_evaluator, :_filter_name
5
+ attr_accessor :_evaluator, :_filter_name, :compact, :include_count
6
6
 
7
7
  def initialize(filter_name, *args, &block)
8
- self._filter_name = filter_name
8
+ self._filter_name = filter_name
9
9
  super
10
+ self.compact = extra_params.delete(:compact) { false }
11
+ self.include_count = extra_params.delete(:include_count) { true }
10
12
  initialize_subaggregations FilterAggregationEvaluator.new, &block
11
13
  end
12
14
 
@@ -23,10 +25,15 @@ module Elasticated
23
25
  end
24
26
 
25
27
  def parse(response)
26
- response_body = response
27
- ret = { 'count' => response_body['doc_count'] }
28
- ret.merge! parse_subaggregations(response_body)
29
- ret
28
+ count = response['doc_count']
29
+
30
+ if _subaggregations.empty?
31
+ compact ? count : { 'count' => count }
32
+ else
33
+ parse_subaggregations(response).tap do |h|
34
+ h['count'] = count if include_count
35
+ end
36
+ end
30
37
  end
31
38
 
32
39
  end
@@ -0,0 +1,28 @@
1
+ module Elasticated
2
+ class GeohashGridAggregation < Aggregation
3
+
4
+ def default_name
5
+ "geohash_grid_by_#{field}"
6
+ end
7
+
8
+ # TODO: this is exactly the same as in SingleValueAggregation
9
+ def build
10
+ operation_info = { field: field }
11
+ operation_info.merge! extra_params
12
+ { operation => operation_info }
13
+ end
14
+
15
+ def parse(response)
16
+ response['buckets'].each_with_object({}) do |bucket, hash|
17
+ hash[bucket['key']] = bucket['doc_count']
18
+ end
19
+ end
20
+
21
+ protected
22
+
23
+ def operation
24
+ :geohash_grid
25
+ end
26
+
27
+ end
28
+ end
@@ -2,8 +2,12 @@ module Elasticated
2
2
  class GroupAggregation < TermsAggregation
3
3
  include Subaggregated
4
4
 
5
+ attr_accessor :compact, :include_count
6
+
5
7
  def initialize(field, *args, &block)
6
8
  super
9
+ self.compact = extra_params.delete(:compact) { false }
10
+ self.include_count = extra_params.delete(:include_count) { true }
7
11
  initialize_subaggregations &block
8
12
  end
9
13
 
@@ -12,16 +16,21 @@ module Elasticated
12
16
  end
13
17
 
14
18
  def build
15
- aggregation_struct = super
16
- aggregation_struct.merge! build_subaggregations
17
- aggregation_struct
19
+ super.merge build_subaggregations
18
20
  end
19
21
 
20
22
  def parse(response)
21
- response['buckets'].inject({}) do |hash, bucket|
22
- bucket_hash = { 'count' => bucket['doc_count'] }
23
- bucket_hash.merge! parse_subaggregations(bucket)
24
- hash.merge bucket['key'] => bucket_hash
23
+ response['buckets'].each_with_object({}) do |bucket, hash|
24
+ count = bucket['doc_count']
25
+
26
+ hash[bucket['key']] =
27
+ if _subaggregations.empty?
28
+ compact ? count : { 'count' => count }
29
+ else
30
+ parse_subaggregations(bucket).tap do |h|
31
+ h['count'] = count if include_count
32
+ end
33
+ end
25
34
  end
26
35
  end
27
36
 
@@ -0,0 +1,34 @@
1
+ module Elasticated
2
+ class MissingAggregation < SingleValueAggregation
3
+ include Subaggregated
4
+
5
+ attr_accessor :compact
6
+
7
+ def initialize(field, *args, &block)
8
+ super
9
+ self.compact = extra_params.delete(:compact) { false }
10
+ initialize_subaggregations &block
11
+ end
12
+
13
+ # implementation
14
+ def operation
15
+ :missing
16
+ end
17
+
18
+ # TODO: this is exactly the same as in GroupAggregation
19
+ def build
20
+ super.merge build_subaggregations
21
+ end
22
+
23
+ def parse(response)
24
+ _subaggregations.empty? ? super : parse_subaggregations(response)
25
+ end
26
+
27
+ protected
28
+
29
+ def result_key
30
+ 'doc_count'
31
+ end
32
+
33
+ end
34
+ end
@@ -2,10 +2,12 @@ module Elasticated
2
2
  class RangeAggregation < Aggregation
3
3
  include Subaggregated
4
4
 
5
- attr_accessor :_conditions
5
+ attr_accessor :_conditions, :compact, :include_count
6
6
 
7
7
  def initialize(field, *args, &block)
8
8
  super
9
+ self.compact = extra_params.delete(:compact) { false }
10
+ self.include_count = extra_params.delete(:include_count) { true }
9
11
  initialize_subaggregations RangeAggregationEvaluator.new, &block
10
12
  end
11
13
 
@@ -24,10 +26,17 @@ module Elasticated
24
26
  end
25
27
 
26
28
  def parse(response)
27
- response['buckets'].inject({}) do |hash, (key_name, values)|
28
- range_body = { 'count' => values['doc_count'] }
29
- range_body.merge! parse_subaggregations(values)
30
- hash.merge key_name => range_body
29
+ response['buckets'].each_with_object({}) do |(key_name, values), hash|
30
+ count = values['doc_count']
31
+
32
+ hash[key_name] =
33
+ if _subaggregations.empty?
34
+ compact ? count : { 'count' => count }
35
+ else
36
+ parse_subaggregations(values).tap do |h|
37
+ h['count'] = count if include_count
38
+ end
39
+ end
31
40
  end
32
41
  end
33
42
 
@@ -0,0 +1,86 @@
1
+ module Elasticated
2
+ class SafeDateHistogramAggregation < HistogramAggregation
3
+ include Subaggregated
4
+
5
+ DEFAULT_INTERVAL = '1d'
6
+
7
+ attr_accessor :offset, :time_zone, :points, :compact, :include_count
8
+
9
+ def initialize(field, opts={}, &block)
10
+ self.offset = opts.delete(:offset)
11
+ self.time_zone = opts.delete(:time_zone)
12
+ self.points = opts.fetch(:points)
13
+ opts.delete(:points)
14
+ self.compact = opts.delete(:compact) { false }
15
+ self.include_count = opts.delete(:include_count) { true }
16
+ interval = opts.delete(:interval) || DEFAULT_INTERVAL
17
+ super field, interval, opts, &block
18
+ end
19
+
20
+ def build
21
+ terms = { field: field, interval: safe_interval.to_s }
22
+ if offset # '1.4 style'
23
+ terms.merge! pre_offset: offset
24
+ terms.merge! post_offset: offset
25
+ end
26
+ if time_zone
27
+ terms.merge! time_zone: time_zone
28
+ end
29
+ terms.merge! extra_params
30
+ aggregation_struct = { date_histogram: terms }
31
+ aggregation_struct.merge! build_subaggregations
32
+ aggregation_struct
33
+ end
34
+
35
+ def parse(response)
36
+ response['buckets'].each_with_object({}) do |bucket, hash|
37
+ count = bucket['doc_count']
38
+ key = build_key(bucket)
39
+
40
+ if _subaggregations.empty?
41
+ hash[key] ||= 0
42
+ if compact
43
+ hash[key] = hash[key] + count
44
+ else
45
+ hash[key]['count'] ||= 0
46
+ hash[key]['count'] = hash[key]['count'] + count
47
+ end
48
+ else
49
+ parsed_subaggregations = parse_subaggregations(bucket)
50
+ hash[key] = value_for(parsed_subaggregations, hash, key).tap do |h|
51
+ h['count'] = count if include_count
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ protected
58
+
59
+ def build_key(bucket)
60
+ time = Timing::TimeInZone.at bucket['key'] / 1000, time_zone
61
+
62
+ if parsed_interval == safe_interval
63
+ time.iso8601
64
+ else
65
+ points.select { |p| p <= time.iso8601 }.sort.reverse.first
66
+ end
67
+ end
68
+
69
+ def safe_interval
70
+ parsed_interval > Timing::Interval.days(1) ? Timing::Interval.days(1) : parsed_interval
71
+ end
72
+
73
+ def parsed_interval
74
+ Timing::Interval.parse interval
75
+ end
76
+
77
+ def value_for(aggregation_value, hash, key)
78
+ if aggregation_value.is_a? Hash
79
+ Helpers.hash_sum(hash.fetch(key, {}), aggregation_value)
80
+ else
81
+ hash.fetch(key, 0) + aggregation_value
82
+ end
83
+ end
84
+
85
+ end
86
+ end