elasticated 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +35 -0
  3. data/Gemfile +4 -0
  4. data/README.md +3 -0
  5. data/Rakefile +6 -0
  6. data/elasticated.gemspec +29 -0
  7. data/lib/elasticated.rb +102 -0
  8. data/lib/elasticated/aggregation.rb +36 -0
  9. data/lib/elasticated/aggregations/cardinality_aggregation.rb +15 -0
  10. data/lib/elasticated/aggregations/count_aggregation.rb +15 -0
  11. data/lib/elasticated/aggregations/count_distinct_aggregation.rb +15 -0
  12. data/lib/elasticated/aggregations/count_filtered_aggregation.rb +29 -0
  13. data/lib/elasticated/aggregations/custom_aggregation.rb +25 -0
  14. data/lib/elasticated/aggregations/date_histogram_aggregation.rb +35 -0
  15. data/lib/elasticated/aggregations/filter_aggregation.rb +33 -0
  16. data/lib/elasticated/aggregations/filter_aggregation_evaluator.rb +22 -0
  17. data/lib/elasticated/aggregations/group_aggregation.rb +29 -0
  18. data/lib/elasticated/aggregations/histogram_aggregation.rb +34 -0
  19. data/lib/elasticated/aggregations/nested_aggregation.rb +30 -0
  20. data/lib/elasticated/aggregations/range_aggregation.rb +35 -0
  21. data/lib/elasticated/aggregations/range_aggregation_evaluator.rb +22 -0
  22. data/lib/elasticated/aggregations/ranges_builder.rb +35 -0
  23. data/lib/elasticated/aggregations/single_value_aggregation.rb +47 -0
  24. data/lib/elasticated/aggregations/subaggregated.rb +27 -0
  25. data/lib/elasticated/aggregations/sum_distinct_aggregation.rb +20 -0
  26. data/lib/elasticated/aggregations/terms_aggregation.rb +63 -0
  27. data/lib/elasticated/aggregations/top_hits_aggregation.rb +25 -0
  28. data/lib/elasticated/block_evaluation.rb +15 -0
  29. data/lib/elasticated/boolean_clause.rb +43 -0
  30. data/lib/elasticated/client.rb +84 -0
  31. data/lib/elasticated/clonable.rb +58 -0
  32. data/lib/elasticated/conditions/custom_condition.rb +19 -0
  33. data/lib/elasticated/conditions/exists_condition.rb +11 -0
  34. data/lib/elasticated/conditions/missing_condition.rb +11 -0
  35. data/lib/elasticated/conditions/nested_condition.rb +19 -0
  36. data/lib/elasticated/conditions/range_condition.rb +27 -0
  37. data/lib/elasticated/conditions/script_condition.rb +22 -0
  38. data/lib/elasticated/conditions/standard_condition.rb +26 -0
  39. data/lib/elasticated/conditions/terms_condition.rb +22 -0
  40. data/lib/elasticated/conditions/wildcard_condition.rb +18 -0
  41. data/lib/elasticated/conditions_builder.rb +75 -0
  42. data/lib/elasticated/configurable.rb +9 -0
  43. data/lib/elasticated/configuration.rb +9 -0
  44. data/lib/elasticated/default_logger.rb +27 -0
  45. data/lib/elasticated/delimiters/date_field_delimiter.rb +33 -0
  46. data/lib/elasticated/delimiters/standard_field_delimiter.rb +33 -0
  47. data/lib/elasticated/delimiters/term_field_delimiter.rb +24 -0
  48. data/lib/elasticated/document.rb +46 -0
  49. data/lib/elasticated/helpers.rb +28 -0
  50. data/lib/elasticated/index_selector.rb +44 -0
  51. data/lib/elasticated/inspectionable.rb +9 -0
  52. data/lib/elasticated/mapping.rb +19 -0
  53. data/lib/elasticated/mapping/builder.rb +36 -0
  54. data/lib/elasticated/mapping/fields_builder.rb +148 -0
  55. data/lib/elasticated/mapping/nested_builder.rb +15 -0
  56. data/lib/elasticated/mapping/object_builder.rb +15 -0
  57. data/lib/elasticated/mapping/partial.rb +11 -0
  58. data/lib/elasticated/mapping/type_builder.rb +14 -0
  59. data/lib/elasticated/partitioned_repository.rb +27 -0
  60. data/lib/elasticated/query.rb +159 -0
  61. data/lib/elasticated/query_aggregations.rb +71 -0
  62. data/lib/elasticated/query_conditions.rb +89 -0
  63. data/lib/elasticated/repositories/monthly_partitioned_repository.rb +96 -0
  64. data/lib/elasticated/repository.rb +139 -0
  65. data/lib/elasticated/results.rb +43 -0
  66. data/lib/version.rb +92 -0
  67. data/spec/aggregation_spec.rb +587 -0
  68. data/spec/date_field_delimiter_spec.rb +67 -0
  69. data/spec/document_spec.rb +44 -0
  70. data/spec/elasticsearch_hit_1.json +14 -0
  71. data/spec/elasticsearch_response_1.json +29 -0
  72. data/spec/elasticsearch_response_2.json +44 -0
  73. data/spec/elasticsearch_top_hits_response.json +20 -0
  74. data/spec/integration_spec.rb +184 -0
  75. data/spec/mapping_spec.rb +219 -0
  76. data/spec/monthly_partitioned_repository_spec.rb +99 -0
  77. data/spec/query_aggregations_spec.rb +44 -0
  78. data/spec/query_conditions_spec.rb +314 -0
  79. data/spec/query_spec.rb +265 -0
  80. data/spec/results_spec.rb +69 -0
  81. data/spec/spec_helper.rb +2 -0
  82. data/spec/term_field_delimiter_spec.rb +39 -0
  83. metadata +225 -0
@@ -0,0 +1,71 @@
1
+ module Elasticated
2
+ class QueryAggregations
3
+
4
+ include Clonable
5
+ include BlockEvaluation
6
+
7
+ attr_accessor :_aggregations
8
+
9
+ def initialize
10
+ self._aggregations = Array.new
11
+ end
12
+
13
+ def build
14
+ _aggregations.inject({}) do |ret, aggregation|
15
+ ret.merge aggregation.name => aggregation.build
16
+ end
17
+ end
18
+
19
+ def parse(response)
20
+ _aggregations.inject({}) do |hash, aggregation|
21
+ name = aggregation.name.to_s
22
+ original_name = aggregation.original_name.to_s
23
+ hash.merge original_name => aggregation.parse(response[name])
24
+ end
25
+ end
26
+
27
+ def merge! other_query_aggs
28
+ other_query_aggs._aggregations.each do |other_aggregation|
29
+ add_aggregation other_aggregation
30
+ end
31
+ end
32
+
33
+ def empty?
34
+ _aggregations.empty?
35
+ end
36
+
37
+ private
38
+
39
+ def get_aggregation_class(agg_name)
40
+ camel_case_name = Helpers.string_to_camel_case agg_name.to_s
41
+ self.class.const_get("::Elasticated::#{camel_case_name}Aggregation") rescue nil
42
+ end
43
+
44
+ def method_missing(method_name, *args, &block)
45
+ agg_class = get_aggregation_class method_name
46
+ if agg_class
47
+ aggregation = agg_class.new(*args, &block)
48
+ add_aggregation aggregation
49
+ else
50
+ super
51
+ end
52
+ end
53
+
54
+ def respond_to_missing?(name, include_private=false)
55
+ get_aggregation_class(name) || super
56
+ end
57
+
58
+ def add_aggregation(aggregation)
59
+ equivalent_agg = _aggregations.select do |previous_agg|
60
+ previous_agg == aggregation
61
+ end.first
62
+ if !equivalent_agg
63
+ _aggregations << aggregation
64
+ aggregation
65
+ else
66
+ equivalent_agg._subaggregations.merge! aggregation._subaggregations if aggregation.is_a? Subaggregated
67
+ end
68
+ end
69
+
70
+ end
71
+ end
@@ -0,0 +1,89 @@
1
+ module Elasticated
2
+ class QueryConditions
3
+
4
+ include Clonable
5
+
6
+ attr_accessor :_must, :_must_not, :_should, :_minimum_should_match, :_cache
7
+
8
+ def initialize
9
+ self._must = BooleanClause.new
10
+ self._must_not = BooleanClause.new
11
+ self._should = BooleanClause.new
12
+ end
13
+
14
+ # delimiters
15
+
16
+ def fill_delimiter(field_delimiter)
17
+ _must.fill_delimiter field_delimiter
18
+ end
19
+
20
+ # conditions
21
+
22
+ def add(condition)
23
+ _must.add condition
24
+ end
25
+
26
+ include ConditionsBuilder
27
+
28
+ def must(&block)
29
+ _must.evaluate block
30
+ end
31
+
32
+ def must_not(&block)
33
+ _must_not.evaluate block
34
+ end
35
+
36
+ def should(&block)
37
+ _should.evaluate block
38
+ end
39
+
40
+ # 'must_not' conditions
41
+
42
+ def not_equal(*args)
43
+ _must_not.equal *args
44
+ end
45
+
46
+ def without(*args)
47
+ _must_not.with *args
48
+ end
49
+
50
+ def not_between(*args)
51
+ _must_not.between *args
52
+ end
53
+
54
+ # cache
55
+
56
+ def cache(value=true)
57
+ self._cache = value
58
+ end
59
+
60
+ # other methods
61
+
62
+ def minimum_should_match(value)
63
+ self._minimum_should_match = value
64
+ end
65
+
66
+ def build
67
+ if empty?
68
+ { match_all: {} }
69
+ elsif _must_not.empty? && _should.empty? && _must.count == 1
70
+ _must.build_first
71
+ else
72
+ bool = {}
73
+ bool.merge! must: _must.build unless _must.empty?
74
+ bool.merge! must_not: _must_not.build unless _must_not.empty?
75
+ if !_should.empty?
76
+ bool.merge! should: _should.build
77
+ bool.merge! minimum_should_match: _minimum_should_match if _minimum_should_match
78
+ end
79
+ bool.merge! _cache: _cache if _cache
80
+ { bool: bool }
81
+ end
82
+ end
83
+
84
+ def empty?
85
+ _must.empty? && _must_not.empty? && _should.empty?
86
+ end
87
+
88
+ end
89
+ end
@@ -0,0 +1,96 @@
1
+ module Elasticated
2
+ class MonthlyPartitionedRepository < Repository
3
+ include Configurable
4
+
5
+ attr_accessor :_index_name, :_index_alias, :_general_alias
6
+ attr_accessor :date_field
7
+ attr_accessor :dynamic_creation, :index_options
8
+
9
+ def initialize(opts={})
10
+ super opts
11
+ self._index_name = opts[:index_name] || raise('An index prefix should be specified')
12
+ self._index_alias = opts[:index_alias] || raise('An index alias prefix should be specified')
13
+ self._general_alias = opts[:general_alias] || _index_alias
14
+ self.date_field = opts[:date_field] || :date
15
+ self.index_options = opts[:index_options] # hash with mapping, shards info, etc or nil
16
+ self.dynamic_creation = opts[:dynamic_creation] || false
17
+ end
18
+
19
+ def execute(action, query, opts={})
20
+ index_alias = index_alias_for query
21
+ super action, query, opts.merge(index: index_alias)
22
+ # rescue Elasticsearch::Transport::Transport::Errors::NotFound => e
23
+ # action == :count ? 0 : nil
24
+ end
25
+
26
+ def prepare(action, document, opts={})
27
+ date = date_from document
28
+ raise("The document has not a valid '#{date_field}' field") unless date
29
+ check_or_create_index! date
30
+ super action, document, opts.merge(index: index_alias_for(date))
31
+ end
32
+
33
+ def date_from(document)
34
+ str = document.source[date_field.to_s]
35
+ str && Date.parse(str)
36
+ end
37
+
38
+ def check_or_create_index! date
39
+ index_alias = index_alias_for date
40
+ return if client.index_exists? index_alias
41
+ raise("Index '#{index_alias}' not found (dynamic per-month index creation is disabled)") unless dynamic_creation
42
+ index_name = index_name_for date
43
+ create_index index_name
44
+ create_alias index_name, index_alias
45
+ end
46
+
47
+ def create_index! date
48
+ create_index index_name_for date
49
+ end
50
+
51
+ def create_alias! date
52
+ index_name = index_name_for date
53
+ index_alias = index_alias_for date
54
+ create_alias index_name, index_alias
55
+ end
56
+
57
+ def index_name_for(object)
58
+ object.is_a?(Query) ? index_alias_for_query(object) : index_name_for_date(object)
59
+ end
60
+
61
+ def index_alias_for(object)
62
+ object.is_a?(Query) ? index_alias_for_query(object) : index_alias_for_date(object)
63
+ end
64
+
65
+ protected
66
+
67
+ def create_index(index_name)
68
+ client.create_index index_name, index_options
69
+ log.info "Index #{index_name} created"
70
+ end
71
+
72
+ def create_alias(index_name, index_alias)
73
+ client.create_alias index_name, index_alias
74
+ client.create_alias index_name, _general_alias
75
+ log.info "Alias #{index_alias} created over index #{index_name}"
76
+ end
77
+
78
+ def index_alias_for_query(query)
79
+ _general_alias # TODO
80
+ end
81
+
82
+ def index_alias_for_date(date)
83
+ generate_name _index_alias, date
84
+ end
85
+
86
+ def index_name_for_date(date)
87
+ generate_name _index_name, date
88
+ end
89
+
90
+ def generate_name(prefix, date)
91
+ str = date.strftime '%Y-%m'
92
+ "#{prefix}-#{str}" # prefix-YYYY-MM
93
+ end
94
+
95
+ end
96
+ end
@@ -0,0 +1,139 @@
1
+ module Elasticated
2
+ class Repository
3
+
4
+ # child can implement 'execute(action, query, opts)'
5
+ # child can implement 'prepare(action, document, opts)'
6
+
7
+ attr_accessor :client
8
+
9
+ def initialize(opts={})
10
+ self.client = Client.new opts
11
+ end
12
+
13
+ def execute_search(query, opts={})
14
+ execute :search, query, opts
15
+ end
16
+
17
+ def execute_aggregations(query, opts={})
18
+ execute :aggregations, query, opts
19
+ end
20
+
21
+ def execute_count(query, opts={})
22
+ execute :count, query, opts
23
+ end
24
+
25
+ def execute_aggregated_search(query, opts={})
26
+ execute :aggregated_search, query, opts
27
+ end
28
+
29
+ def delete_by(query, opts={})
30
+ execute :delete, query, opts
31
+ end
32
+
33
+ def exists?(query, opts={})
34
+ execute_count(query, opts) > 0
35
+ end
36
+
37
+ def index_document(document, opts={})
38
+ prepare :index, document, opts
39
+ end
40
+
41
+ def update_document(document, opts={})
42
+ prepare :update, document, opts
43
+ end
44
+
45
+ protected
46
+
47
+ # write actions
48
+
49
+ def _exec_index(document, opts={})
50
+ _exec_upsert :index, document, opts
51
+ end
52
+
53
+ def _exec_update(document, opts={})
54
+ _exec_upsert :update, document, opts
55
+ end
56
+
57
+ def _exec_upsert(method, document, opts={})
58
+ opts.merge! id: document.id if document.id
59
+ opts.merge! type: document.type unless opts[:type]
60
+ opts.merge! index: document.index unless opts[:index]
61
+ document.index = opts[:index]
62
+ document.type = opts[:type]
63
+ client.send "#{method}_document", document.source, opts
64
+ end
65
+
66
+ # read actions
67
+
68
+ def _exec_aggregations(query, opts={})
69
+ body = query.build_for_aggregations
70
+ response = client.search body, opts
71
+ query.parse_aggregations response['aggregations']
72
+ end
73
+
74
+ def _exec_count(query, opts={})
75
+ body = query.build_for_count
76
+ response = client.count body, opts
77
+ response['count']
78
+ end
79
+
80
+ def _exec_search(query, opts={})
81
+ _exec_paginated_search query, false, opts
82
+ end
83
+
84
+ def _exec_aggregated_search(query, opts={})
85
+ _exec_paginated_search query, true, opts
86
+ end
87
+
88
+ def _exec_delete(query, opts={})
89
+ client.delete query, opts
90
+ end
91
+
92
+ def _exec_paginated_search(query, aggregated, opts={})
93
+ scroll_interval = '3m'
94
+ body = aggregated ? query.build_for_aggregated_search : query.build_for_search
95
+ if query.limited?
96
+ response = client.search body, opts
97
+ Results.from_elasticsearch_response response, query
98
+ elsif query.sorted? || query.aggregated?
99
+ # normal pagination
100
+ page_size = 50
101
+ current_page = 1
102
+ loop do
103
+ offset = page_size * (current_page - 1)
104
+ response = client.search body, opts.merge(size: page_size, from: offset)
105
+ if current_page == 1
106
+ results = Results.from_elasticsearch_response response, query
107
+ else
108
+ results.append_results_from response
109
+ end
110
+ total_pages = (response['hits']['total'] / page_size.to_f).ceil
111
+ break if current_page >= total_pages
112
+ current_page += 1
113
+ body = query.build_for_search if aggregated
114
+ end
115
+ results
116
+ else
117
+ # scan & scroll
118
+ response = client.search body, opts.merge(search_type: 'scan', scroll: scroll_interval, size: 1000)
119
+ while response = client.scroll(response['_scroll_id'], scroll: scroll_interval) and not response['hits']['hits'].empty? do
120
+ results = results ? results.append_results_from(response) : Results.from_elasticsearch_response(response, query)
121
+ end
122
+ results ? results : Results.from_elasticsearch_response(response, query)
123
+ end
124
+ end
125
+
126
+ # abstract methods
127
+
128
+ def execute(action, query, opts={})
129
+ # child's implementation here
130
+ send "_exec_#{action}", query, opts
131
+ end
132
+
133
+ def prepare(action, document, opts={})
134
+ # child's implementation here
135
+ send "_exec_#{action}", document, opts
136
+ end
137
+
138
+ end
139
+ end
@@ -0,0 +1,43 @@
1
+ module Elasticated
2
+
3
+ ShardsInfo = Struct.new :total, :successful, :failed
4
+ HitsInfo = Struct.new :total, :max_score
5
+
6
+ class Results < Array
7
+
8
+ def self.from_elasticsearch_response(elasticsearch_response, query=nil)
9
+ documents = elasticsearch_response['hits']['hits'].map{ |hit| Document.from_elasticsearch_hit hit }
10
+ results = new documents
11
+ results.documents = documents
12
+ # cluster metadata
13
+ results.took = elasticsearch_response['took']
14
+ results.timed_out = elasticsearch_response['timed_out']
15
+ # shards metadata
16
+ shards = elasticsearch_response['_shards']
17
+ results.shards = ShardsInfo.new shards['total'], shards['successful'], shards['failed']
18
+ # search metadata
19
+ hits = elasticsearch_response['hits']
20
+ results.hits = HitsInfo.new hits['total'], hits['max_score']
21
+ # aggregations results
22
+ aggregations = elasticsearch_response['aggregations']
23
+ results.aggregations = query.parse_aggregations aggregations if query && aggregations
24
+ results
25
+ end
26
+
27
+ attr_accessor :took, :timed_out
28
+ attr_accessor :shards # methods: total, successful, failed
29
+ attr_accessor :hits # methods: total, max_score
30
+ attr_accessor :documents, :aggregations
31
+
32
+ def append_results_from(elasticsearch_response)
33
+ elasticsearch_response['hits']['hits'].each do |hit|
34
+ documents.push Document.from_elasticsearch_hit hit
35
+ end
36
+ end
37
+
38
+ def sources
39
+ documents.map &:source
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,92 @@
1
+ module Elasticated
2
+ VERSION = '1.0.0'
3
+ end
4
+
5
+ # Changelog
6
+
7
+ # 1.0.0
8
+ # Cambios en Repository
9
+ # Se elimina InsightsRepository y sus hijos
10
+ # Cambios en MonthlyPartitionedRepository
11
+ # Ahora las queries pueden devolver resultados y aggregations en un solo pedido
12
+ # Se agrega la clase Document
13
+ # Se agrega la clase Results
14
+ # Se eliminan SocialContentRepository y ConversationsRepository
15
+ # El Terms Filter ahora acepta parametros opcionales (execution y cache)
16
+ # El Nested Filter ahora acepta el parametro opcional cache
17
+ # Ahora se puede setear el parametro cache para el Bool Filter
18
+ # El Range Filter ahora acepta parametros opcionales (execution y cache)
19
+ # Ahora se admite post_filter en la query
20
+ # Ahora se admiten bools anidados
21
+
22
+ # 0.8.2
23
+ # Ahora se puede ordenar por multiples criterios en una TermsAggregation
24
+
25
+ # 0.8.1
26
+ # Se agrego la posibilidad de recibir post_zone y time_zone al DateHistogramAggregation
27
+
28
+ # 0.8.0
29
+ # Se agrego un repositorio de conversaciones
30
+
31
+ # 0.7.2
32
+ # Se agregan las metricas ads_actions y ads_post_engagement a FacebookInsightsRepository
33
+
34
+ # 0.7.1
35
+ # Se agrega el campo sub_type al mapping de post de FacebookInsightsRepository
36
+
37
+ # 0.7.0
38
+ # Agregado YoutubeInsightsRepository
39
+ # Agregado InstagramInsightsRepository
40
+ # Agregada opcion para configurar todos los repositorios al mismo tiempo
41
+ # Agregadas las condiciones greater_than, greater_equal, less_than y lower_or_equals
42
+ # Agregada RangeAggregation
43
+
44
+ # 0.6.0
45
+ # Agregado el campo business_ads al mapping de FacebookInsightsRepository
46
+ # Agregados los campos de la marketing api a los tipos de FacebookInsightsRepository
47
+ # Agregadas las metricas 'embedded_media_views' y 'video_views' a TwitterInsightsRepository
48
+
49
+ # 0.5.2
50
+ # Ahora se puede "no compactar" los resultados de una AggregatedQuery
51
+ # Fix critico al metodo '==' de FilterAggregation
52
+ # Agregadas las metricas del tipo 'video_retention' a FacebookInsightsRepository
53
+ # Agregada la opcion :offset a DateHistogramAggregation
54
+
55
+ # 0.5.1
56
+ # Fix critico a FacebookInsightsRepository#update
57
+ # Actualizada la dependencia keepcon_utils a la version 0.3
58
+
59
+ # 0.5.0
60
+ # Agregadas metricas faltantes de video al mapping de FacebookInsightsRepository
61
+ # Fix critico al metodo '==' de QueryConditions
62
+ # Agregado MappingBuilder
63
+ # Agregada la clase Client, wrapper de Elasticsearch::Client
64
+ # Se elimina la dependencia de Repository hacia Elasticsearch::Client, ahora pasa por Client
65
+ # Ahora el TermsAggregation se construye con size 0 por default
66
+ # Se elimina AbstractQuery#over_repository
67
+ # La opcion 'include_id' de repository.execute_query pasa a llamarse 'metadata'
68
+
69
+ # 0.4.1
70
+ # Ahora los valores numericos de InsightsRepository son del tipo long
71
+
72
+ # 0.4.0
73
+ # Agregado metodo update_document en InsightsRepository
74
+ # Agregadas las key tipo 'calc' ademas de las preexistentes 'gen' a InsightsRepository
75
+ # Contemplados los pares key-value de tercer nivel en InsightsRepository#prepare,restore
76
+ # Ahora repository.execute_query acepta la opcion 'include_id'
77
+
78
+ # 0.3.0
79
+ # Agregado metodo create_test_index en SocialContentRepository
80
+
81
+ # 0.2.0
82
+ # Se agrega NestedAggregation
83
+ # Ahora las aggregations que adminen subaggregations se "auto-mergean"
84
+ # Agregado el filtro 'nested'
85
+ # Agregado 'min' aggregation
86
+ # Agregado 'page_fans_country' al mapping de fb insights
87
+ # Agregado 'page_fans_gender_age' al mapping de fb insights
88
+ # Los metodos 'create_index_for' y 'put_alias_for' de 'facebook_insights_repository' ahora son publicos
89
+ # Se agrega HistogramAggregation
90
+ # Se agrega MonthlyPartitionedRepository
91
+ # Se agrega InsightsRepository
92
+ # Se agrega InsightsMappingGenerator