elasticated 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +35 -0
  3. data/Gemfile +4 -0
  4. data/README.md +3 -0
  5. data/Rakefile +6 -0
  6. data/elasticated.gemspec +29 -0
  7. data/lib/elasticated.rb +102 -0
  8. data/lib/elasticated/aggregation.rb +36 -0
  9. data/lib/elasticated/aggregations/cardinality_aggregation.rb +15 -0
  10. data/lib/elasticated/aggregations/count_aggregation.rb +15 -0
  11. data/lib/elasticated/aggregations/count_distinct_aggregation.rb +15 -0
  12. data/lib/elasticated/aggregations/count_filtered_aggregation.rb +29 -0
  13. data/lib/elasticated/aggregations/custom_aggregation.rb +25 -0
  14. data/lib/elasticated/aggregations/date_histogram_aggregation.rb +35 -0
  15. data/lib/elasticated/aggregations/filter_aggregation.rb +33 -0
  16. data/lib/elasticated/aggregations/filter_aggregation_evaluator.rb +22 -0
  17. data/lib/elasticated/aggregations/group_aggregation.rb +29 -0
  18. data/lib/elasticated/aggregations/histogram_aggregation.rb +34 -0
  19. data/lib/elasticated/aggregations/nested_aggregation.rb +30 -0
  20. data/lib/elasticated/aggregations/range_aggregation.rb +35 -0
  21. data/lib/elasticated/aggregations/range_aggregation_evaluator.rb +22 -0
  22. data/lib/elasticated/aggregations/ranges_builder.rb +35 -0
  23. data/lib/elasticated/aggregations/single_value_aggregation.rb +47 -0
  24. data/lib/elasticated/aggregations/subaggregated.rb +27 -0
  25. data/lib/elasticated/aggregations/sum_distinct_aggregation.rb +20 -0
  26. data/lib/elasticated/aggregations/terms_aggregation.rb +63 -0
  27. data/lib/elasticated/aggregations/top_hits_aggregation.rb +25 -0
  28. data/lib/elasticated/block_evaluation.rb +15 -0
  29. data/lib/elasticated/boolean_clause.rb +43 -0
  30. data/lib/elasticated/client.rb +84 -0
  31. data/lib/elasticated/clonable.rb +58 -0
  32. data/lib/elasticated/conditions/custom_condition.rb +19 -0
  33. data/lib/elasticated/conditions/exists_condition.rb +11 -0
  34. data/lib/elasticated/conditions/missing_condition.rb +11 -0
  35. data/lib/elasticated/conditions/nested_condition.rb +19 -0
  36. data/lib/elasticated/conditions/range_condition.rb +27 -0
  37. data/lib/elasticated/conditions/script_condition.rb +22 -0
  38. data/lib/elasticated/conditions/standard_condition.rb +26 -0
  39. data/lib/elasticated/conditions/terms_condition.rb +22 -0
  40. data/lib/elasticated/conditions/wildcard_condition.rb +18 -0
  41. data/lib/elasticated/conditions_builder.rb +75 -0
  42. data/lib/elasticated/configurable.rb +9 -0
  43. data/lib/elasticated/configuration.rb +9 -0
  44. data/lib/elasticated/default_logger.rb +27 -0
  45. data/lib/elasticated/delimiters/date_field_delimiter.rb +33 -0
  46. data/lib/elasticated/delimiters/standard_field_delimiter.rb +33 -0
  47. data/lib/elasticated/delimiters/term_field_delimiter.rb +24 -0
  48. data/lib/elasticated/document.rb +46 -0
  49. data/lib/elasticated/helpers.rb +28 -0
  50. data/lib/elasticated/index_selector.rb +44 -0
  51. data/lib/elasticated/inspectionable.rb +9 -0
  52. data/lib/elasticated/mapping.rb +19 -0
  53. data/lib/elasticated/mapping/builder.rb +36 -0
  54. data/lib/elasticated/mapping/fields_builder.rb +148 -0
  55. data/lib/elasticated/mapping/nested_builder.rb +15 -0
  56. data/lib/elasticated/mapping/object_builder.rb +15 -0
  57. data/lib/elasticated/mapping/partial.rb +11 -0
  58. data/lib/elasticated/mapping/type_builder.rb +14 -0
  59. data/lib/elasticated/partitioned_repository.rb +27 -0
  60. data/lib/elasticated/query.rb +159 -0
  61. data/lib/elasticated/query_aggregations.rb +71 -0
  62. data/lib/elasticated/query_conditions.rb +89 -0
  63. data/lib/elasticated/repositories/monthly_partitioned_repository.rb +96 -0
  64. data/lib/elasticated/repository.rb +139 -0
  65. data/lib/elasticated/results.rb +43 -0
  66. data/lib/version.rb +92 -0
  67. data/spec/aggregation_spec.rb +587 -0
  68. data/spec/date_field_delimiter_spec.rb +67 -0
  69. data/spec/document_spec.rb +44 -0
  70. data/spec/elasticsearch_hit_1.json +14 -0
  71. data/spec/elasticsearch_response_1.json +29 -0
  72. data/spec/elasticsearch_response_2.json +44 -0
  73. data/spec/elasticsearch_top_hits_response.json +20 -0
  74. data/spec/integration_spec.rb +184 -0
  75. data/spec/mapping_spec.rb +219 -0
  76. data/spec/monthly_partitioned_repository_spec.rb +99 -0
  77. data/spec/query_aggregations_spec.rb +44 -0
  78. data/spec/query_conditions_spec.rb +314 -0
  79. data/spec/query_spec.rb +265 -0
  80. data/spec/results_spec.rb +69 -0
  81. data/spec/spec_helper.rb +2 -0
  82. data/spec/term_field_delimiter_spec.rb +39 -0
  83. metadata +225 -0
@@ -0,0 +1,71 @@
1
+ module Elasticated
2
+ class QueryAggregations
3
+
4
+ include Clonable
5
+ include BlockEvaluation
6
+
7
+ attr_accessor :_aggregations
8
+
9
+ def initialize
10
+ self._aggregations = Array.new
11
+ end
12
+
13
+ def build
14
+ _aggregations.inject({}) do |ret, aggregation|
15
+ ret.merge aggregation.name => aggregation.build
16
+ end
17
+ end
18
+
19
+ def parse(response)
20
+ _aggregations.inject({}) do |hash, aggregation|
21
+ name = aggregation.name.to_s
22
+ original_name = aggregation.original_name.to_s
23
+ hash.merge original_name => aggregation.parse(response[name])
24
+ end
25
+ end
26
+
27
+ def merge! other_query_aggs
28
+ other_query_aggs._aggregations.each do |other_aggregation|
29
+ add_aggregation other_aggregation
30
+ end
31
+ end
32
+
33
+ def empty?
34
+ _aggregations.empty?
35
+ end
36
+
37
+ private
38
+
39
+ def get_aggregation_class(agg_name)
40
+ camel_case_name = Helpers.string_to_camel_case agg_name.to_s
41
+ self.class.const_get("::Elasticated::#{camel_case_name}Aggregation") rescue nil
42
+ end
43
+
44
+ def method_missing(method_name, *args, &block)
45
+ agg_class = get_aggregation_class method_name
46
+ if agg_class
47
+ aggregation = agg_class.new(*args, &block)
48
+ add_aggregation aggregation
49
+ else
50
+ super
51
+ end
52
+ end
53
+
54
+ def respond_to_missing?(name, include_private=false)
55
+ get_aggregation_class(name) || super
56
+ end
57
+
58
+ def add_aggregation(aggregation)
59
+ equivalent_agg = _aggregations.select do |previous_agg|
60
+ previous_agg == aggregation
61
+ end.first
62
+ if !equivalent_agg
63
+ _aggregations << aggregation
64
+ aggregation
65
+ else
66
+ equivalent_agg._subaggregations.merge! aggregation._subaggregations if aggregation.is_a? Subaggregated
67
+ end
68
+ end
69
+
70
+ end
71
+ end
@@ -0,0 +1,89 @@
1
+ module Elasticated
2
+ class QueryConditions
3
+
4
+ include Clonable
5
+
6
+ attr_accessor :_must, :_must_not, :_should, :_minimum_should_match, :_cache
7
+
8
+ def initialize
9
+ self._must = BooleanClause.new
10
+ self._must_not = BooleanClause.new
11
+ self._should = BooleanClause.new
12
+ end
13
+
14
+ # delimiters
15
+
16
+ def fill_delimiter(field_delimiter)
17
+ _must.fill_delimiter field_delimiter
18
+ end
19
+
20
+ # conditions
21
+
22
+ def add(condition)
23
+ _must.add condition
24
+ end
25
+
26
+ include ConditionsBuilder
27
+
28
+ def must(&block)
29
+ _must.evaluate block
30
+ end
31
+
32
+ def must_not(&block)
33
+ _must_not.evaluate block
34
+ end
35
+
36
+ def should(&block)
37
+ _should.evaluate block
38
+ end
39
+
40
+ # 'must_not' conditions
41
+
42
+ def not_equal(*args)
43
+ _must_not.equal *args
44
+ end
45
+
46
+ def without(*args)
47
+ _must_not.with *args
48
+ end
49
+
50
+ def not_between(*args)
51
+ _must_not.between *args
52
+ end
53
+
54
+ # cache
55
+
56
+ def cache(value=true)
57
+ self._cache = value
58
+ end
59
+
60
+ # other methods
61
+
62
+ def minimum_should_match(value)
63
+ self._minimum_should_match = value
64
+ end
65
+
66
+ def build
67
+ if empty?
68
+ { match_all: {} }
69
+ elsif _must_not.empty? && _should.empty? && _must.count == 1
70
+ _must.build_first
71
+ else
72
+ bool = {}
73
+ bool.merge! must: _must.build unless _must.empty?
74
+ bool.merge! must_not: _must_not.build unless _must_not.empty?
75
+ if !_should.empty?
76
+ bool.merge! should: _should.build
77
+ bool.merge! minimum_should_match: _minimum_should_match if _minimum_should_match
78
+ end
79
+ bool.merge! _cache: _cache if _cache
80
+ { bool: bool }
81
+ end
82
+ end
83
+
84
+ def empty?
85
+ _must.empty? && _must_not.empty? && _should.empty?
86
+ end
87
+
88
+ end
89
+ end
@@ -0,0 +1,96 @@
1
+ module Elasticated
2
+ class MonthlyPartitionedRepository < Repository
3
+ include Configurable
4
+
5
+ attr_accessor :_index_name, :_index_alias, :_general_alias
6
+ attr_accessor :date_field
7
+ attr_accessor :dynamic_creation, :index_options
8
+
9
+ def initialize(opts={})
10
+ super opts
11
+ self._index_name = opts[:index_name] || raise('An index prefix should be specified')
12
+ self._index_alias = opts[:index_alias] || raise('An index alias prefix should be specified')
13
+ self._general_alias = opts[:general_alias] || _index_alias
14
+ self.date_field = opts[:date_field] || :date
15
+ self.index_options = opts[:index_options] # hash with mapping, shards info, etc or nil
16
+ self.dynamic_creation = opts[:dynamic_creation] || false
17
+ end
18
+
19
+ def execute(action, query, opts={})
20
+ index_alias = index_alias_for query
21
+ super action, query, opts.merge(index: index_alias)
22
+ # rescue Elasticsearch::Transport::Transport::Errors::NotFound => e
23
+ # action == :count ? 0 : nil
24
+ end
25
+
26
+ def prepare(action, document, opts={})
27
+ date = date_from document
28
+ raise("The document has not a valid '#{date_field}' field") unless date
29
+ check_or_create_index! date
30
+ super action, document, opts.merge(index: index_alias_for(date))
31
+ end
32
+
33
+ def date_from(document)
34
+ str = document.source[date_field.to_s]
35
+ str && Date.parse(str)
36
+ end
37
+
38
+ def check_or_create_index! date
39
+ index_alias = index_alias_for date
40
+ return if client.index_exists? index_alias
41
+ raise("Index '#{index_alias}' not found (dynamic per-month index creation is disabled)") unless dynamic_creation
42
+ index_name = index_name_for date
43
+ create_index index_name
44
+ create_alias index_name, index_alias
45
+ end
46
+
47
+ def create_index! date
48
+ create_index index_name_for date
49
+ end
50
+
51
+ def create_alias! date
52
+ index_name = index_name_for date
53
+ index_alias = index_alias_for date
54
+ create_alias index_name, index_alias
55
+ end
56
+
57
+ def index_name_for(object)
58
+ object.is_a?(Query) ? index_alias_for_query(object) : index_name_for_date(object)
59
+ end
60
+
61
+ def index_alias_for(object)
62
+ object.is_a?(Query) ? index_alias_for_query(object) : index_alias_for_date(object)
63
+ end
64
+
65
+ protected
66
+
67
+ def create_index(index_name)
68
+ client.create_index index_name, index_options
69
+ log.info "Index #{index_name} created"
70
+ end
71
+
72
+ def create_alias(index_name, index_alias)
73
+ client.create_alias index_name, index_alias
74
+ client.create_alias index_name, _general_alias
75
+ log.info "Alias #{index_alias} created over index #{index_name}"
76
+ end
77
+
78
+ def index_alias_for_query(query)
79
+ _general_alias # TODO
80
+ end
81
+
82
+ def index_alias_for_date(date)
83
+ generate_name _index_alias, date
84
+ end
85
+
86
+ def index_name_for_date(date)
87
+ generate_name _index_name, date
88
+ end
89
+
90
+ def generate_name(prefix, date)
91
+ str = date.strftime '%Y-%m'
92
+ "#{prefix}-#{str}" # prefix-YYYY-MM
93
+ end
94
+
95
+ end
96
+ end
@@ -0,0 +1,139 @@
1
+ module Elasticated
2
+ class Repository
3
+
4
+ # child can implement 'execute(action, query, opts)'
5
+ # child can implement 'prepare(action, document, opts)'
6
+
7
+ attr_accessor :client
8
+
9
+ def initialize(opts={})
10
+ self.client = Client.new opts
11
+ end
12
+
13
+ def execute_search(query, opts={})
14
+ execute :search, query, opts
15
+ end
16
+
17
+ def execute_aggregations(query, opts={})
18
+ execute :aggregations, query, opts
19
+ end
20
+
21
+ def execute_count(query, opts={})
22
+ execute :count, query, opts
23
+ end
24
+
25
+ def execute_aggregated_search(query, opts={})
26
+ execute :aggregated_search, query, opts
27
+ end
28
+
29
+ def delete_by(query, opts={})
30
+ execute :delete, query, opts
31
+ end
32
+
33
+ def exists?(query, opts={})
34
+ execute_count(query, opts) > 0
35
+ end
36
+
37
+ def index_document(document, opts={})
38
+ prepare :index, document, opts
39
+ end
40
+
41
+ def update_document(document, opts={})
42
+ prepare :update, document, opts
43
+ end
44
+
45
+ protected
46
+
47
+ # write actions
48
+
49
+ def _exec_index(document, opts={})
50
+ _exec_upsert :index, document, opts
51
+ end
52
+
53
+ def _exec_update(document, opts={})
54
+ _exec_upsert :update, document, opts
55
+ end
56
+
57
+ def _exec_upsert(method, document, opts={})
58
+ opts.merge! id: document.id if document.id
59
+ opts.merge! type: document.type unless opts[:type]
60
+ opts.merge! index: document.index unless opts[:index]
61
+ document.index = opts[:index]
62
+ document.type = opts[:type]
63
+ client.send "#{method}_document", document.source, opts
64
+ end
65
+
66
+ # read actions
67
+
68
+ def _exec_aggregations(query, opts={})
69
+ body = query.build_for_aggregations
70
+ response = client.search body, opts
71
+ query.parse_aggregations response['aggregations']
72
+ end
73
+
74
+ def _exec_count(query, opts={})
75
+ body = query.build_for_count
76
+ response = client.count body, opts
77
+ response['count']
78
+ end
79
+
80
+ def _exec_search(query, opts={})
81
+ _exec_paginated_search query, false, opts
82
+ end
83
+
84
+ def _exec_aggregated_search(query, opts={})
85
+ _exec_paginated_search query, true, opts
86
+ end
87
+
88
+ def _exec_delete(query, opts={})
89
+ client.delete query, opts
90
+ end
91
+
92
+ def _exec_paginated_search(query, aggregated, opts={})
93
+ scroll_interval = '3m'
94
+ body = aggregated ? query.build_for_aggregated_search : query.build_for_search
95
+ if query.limited?
96
+ response = client.search body, opts
97
+ Results.from_elasticsearch_response response, query
98
+ elsif query.sorted? || query.aggregated?
99
+ # normal pagination
100
+ page_size = 50
101
+ current_page = 1
102
+ loop do
103
+ offset = page_size * (current_page - 1)
104
+ response = client.search body, opts.merge(size: page_size, from: offset)
105
+ if current_page == 1
106
+ results = Results.from_elasticsearch_response response, query
107
+ else
108
+ results.append_results_from response
109
+ end
110
+ total_pages = (response['hits']['total'] / page_size.to_f).ceil
111
+ break if current_page >= total_pages
112
+ current_page += 1
113
+ body = query.build_for_search if aggregated
114
+ end
115
+ results
116
+ else
117
+ # scan & scroll
118
+ response = client.search body, opts.merge(search_type: 'scan', scroll: scroll_interval, size: 1000)
119
+ while response = client.scroll(response['_scroll_id'], scroll: scroll_interval) and not response['hits']['hits'].empty? do
120
+ results = results ? results.append_results_from(response) : Results.from_elasticsearch_response(response, query)
121
+ end
122
+ results ? results : Results.from_elasticsearch_response(response, query)
123
+ end
124
+ end
125
+
126
+ # abstract methods
127
+
128
+ def execute(action, query, opts={})
129
+ # child's implementation here
130
+ send "_exec_#{action}", query, opts
131
+ end
132
+
133
+ def prepare(action, document, opts={})
134
+ # child's implementation here
135
+ send "_exec_#{action}", document, opts
136
+ end
137
+
138
+ end
139
+ end
@@ -0,0 +1,43 @@
1
+ module Elasticated
2
+
3
+ ShardsInfo = Struct.new :total, :successful, :failed
4
+ HitsInfo = Struct.new :total, :max_score
5
+
6
+ class Results < Array
7
+
8
+ def self.from_elasticsearch_response(elasticsearch_response, query=nil)
9
+ documents = elasticsearch_response['hits']['hits'].map{ |hit| Document.from_elasticsearch_hit hit }
10
+ results = new documents
11
+ results.documents = documents
12
+ # cluster metadata
13
+ results.took = elasticsearch_response['took']
14
+ results.timed_out = elasticsearch_response['timed_out']
15
+ # shards metadata
16
+ shards = elasticsearch_response['_shards']
17
+ results.shards = ShardsInfo.new shards['total'], shards['successful'], shards['failed']
18
+ # search metadata
19
+ hits = elasticsearch_response['hits']
20
+ results.hits = HitsInfo.new hits['total'], hits['max_score']
21
+ # aggregations results
22
+ aggregations = elasticsearch_response['aggregations']
23
+ results.aggregations = query.parse_aggregations aggregations if query && aggregations
24
+ results
25
+ end
26
+
27
+ attr_accessor :took, :timed_out
28
+ attr_accessor :shards # methods: total, successful, failed
29
+ attr_accessor :hits # methods: total, max_score
30
+ attr_accessor :documents, :aggregations
31
+
32
+ def append_results_from(elasticsearch_response)
33
+ elasticsearch_response['hits']['hits'].each do |hit|
34
+ documents.push Document.from_elasticsearch_hit hit
35
+ end
36
+ end
37
+
38
+ def sources
39
+ documents.map &:source
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,92 @@
1
+ module Elasticated
2
+ VERSION = '1.0.0'
3
+ end
4
+
5
+ # Changelog
6
+
7
+ # 1.0.0
8
+ # Cambios en Repository
9
+ # Se elimina InsightsRepository y sus hijos
10
+ # Cambios en MonthlyPartitionedRepository
11
+ # Ahora las queries pueden devolver resultados y aggregations en un solo pedido
12
+ # Se agrega la clase Document
13
+ # Se agrega la clase Results
14
+ # Se eliminan SocialContentRepository y ConversationsRepository
15
+ # El Terms Filter ahora acepta parametros opcionales (execution y cache)
16
+ # El Nested Filter ahora acepta el parametro opcional cache
17
+ # Ahora se puede setear el parametro cache para el Bool Filter
18
+ # El Range Filter ahora acepta parametros opcionales (execution y cache)
19
+ # Ahora se admite post_filter en la query
20
+ # Ahora se admiten bools anidados
21
+
22
+ # 0.8.2
23
+ # Ahora se puede ordenar por multiples criterios en una TermsAggregation
24
+
25
+ # 0.8.1
26
+ # Se agrego la posibilidad de recibir post_zone y time_zone al DateHistogramAggregation
27
+
28
+ # 0.8.0
29
+ # Se agrego un repositorio de conversaciones
30
+
31
+ # 0.7.2
32
+ # Se agregan las metricas ads_actions y ads_post_engagement a FacebookInsightsRepository
33
+
34
+ # 0.7.1
35
+ # Se agrega el campo sub_type al mapping de post de FacebookInsightsRepository
36
+
37
+ # 0.7.0
38
+ # Agregado YoutubeInsightsRepository
39
+ # Agregado InstagramInsightsRepository
40
+ # Agregada opcion para configurar todos los repositorios al mismo tiempo
41
+ # Agregadas las condiciones greater_than, greater_equal, less_than y lower_or_equals
42
+ # Agregada RangeAggregation
43
+
44
+ # 0.6.0
45
+ # Agregado el campo business_ads al mapping de FacebookInsightsRepository
46
+ # Agregados los campos de la marketing api a los tipos de FacebookInsightsRepository
47
+ # Agregadas las metricas 'embedded_media_views' y 'video_views' a TwitterInsightsRepository
48
+
49
+ # 0.5.2
50
+ # Ahora se puede "no compactar" los resultados de una AggregatedQuery
51
+ # Fix critico al metodo '==' de FilterAggregation
52
+ # Agregadas las metricas del tipo 'video_retention' a FacebookInsightsRepository
53
+ # Agregada la opcion :offset a DateHistogramAggregation
54
+
55
+ # 0.5.1
56
+ # Fix critico a FacebookInsightsRepository#update
57
+ # Actualizada la dependencia keepcon_utils a la version 0.3
58
+
59
+ # 0.5.0
60
+ # Agregadas metricas faltantes de video al mapping de FacebookInsightsRepository
61
+ # Fix critico al metodo '==' de QueryConditions
62
+ # Agregado MappingBuilder
63
+ # Agregada la clase Client, wrapper de Elasticsearch::Client
64
+ # Se elimina la dependencia de Repository hacia Elasticsearch::Client, ahora pasa por Client
65
+ # Ahora el TermsAggregation se construye con size 0 por default
66
+ # Se elimina AbstractQuery#over_repository
67
+ # La opcion 'include_id' de repository.execute_query pasa a llamarse 'metadata'
68
+
69
+ # 0.4.1
70
+ # Ahora los valores numericos de InsightsRepository son del tipo long
71
+
72
+ # 0.4.0
73
+ # Agregado metodo update_document en InsightsRepository
74
+ # Agregadas las key tipo 'calc' ademas de las preexistentes 'gen' a InsightsRepository
75
+ # Contemplados los pares key-value de tercer nivel en InsightsRepository#prepare,restore
76
+ # Ahora repository.execute_query acepta la opcion 'include_id'
77
+
78
+ # 0.3.0
79
+ # Agregado metodo create_test_index en SocialContentRepository
80
+
81
+ # 0.2.0
82
+ # Se agrega NestedAggregation
83
+ # Ahora las aggregations que adminen subaggregations se "auto-mergean"
84
+ # Agregado el filtro 'nested'
85
+ # Agregado 'min' aggregation
86
+ # Agregado 'page_fans_country' al mapping de fb insights
87
+ # Agregado 'page_fans_gender_age' al mapping de fb insights
88
+ # Los metodos 'create_index_for' y 'put_alias_for' de 'facebook_insights_repository' ahora son publicos
89
+ # Se agrega HistogramAggregation
90
+ # Se agrega MonthlyPartitionedRepository
91
+ # Se agrega InsightsRepository
92
+ # Se agrega InsightsMappingGenerator