elasticated 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +35 -0
  3. data/Gemfile +4 -0
  4. data/README.md +3 -0
  5. data/Rakefile +6 -0
  6. data/elasticated.gemspec +29 -0
  7. data/lib/elasticated.rb +102 -0
  8. data/lib/elasticated/aggregation.rb +36 -0
  9. data/lib/elasticated/aggregations/cardinality_aggregation.rb +15 -0
  10. data/lib/elasticated/aggregations/count_aggregation.rb +15 -0
  11. data/lib/elasticated/aggregations/count_distinct_aggregation.rb +15 -0
  12. data/lib/elasticated/aggregations/count_filtered_aggregation.rb +29 -0
  13. data/lib/elasticated/aggregations/custom_aggregation.rb +25 -0
  14. data/lib/elasticated/aggregations/date_histogram_aggregation.rb +35 -0
  15. data/lib/elasticated/aggregations/filter_aggregation.rb +33 -0
  16. data/lib/elasticated/aggregations/filter_aggregation_evaluator.rb +22 -0
  17. data/lib/elasticated/aggregations/group_aggregation.rb +29 -0
  18. data/lib/elasticated/aggregations/histogram_aggregation.rb +34 -0
  19. data/lib/elasticated/aggregations/nested_aggregation.rb +30 -0
  20. data/lib/elasticated/aggregations/range_aggregation.rb +35 -0
  21. data/lib/elasticated/aggregations/range_aggregation_evaluator.rb +22 -0
  22. data/lib/elasticated/aggregations/ranges_builder.rb +35 -0
  23. data/lib/elasticated/aggregations/single_value_aggregation.rb +47 -0
  24. data/lib/elasticated/aggregations/subaggregated.rb +27 -0
  25. data/lib/elasticated/aggregations/sum_distinct_aggregation.rb +20 -0
  26. data/lib/elasticated/aggregations/terms_aggregation.rb +63 -0
  27. data/lib/elasticated/aggregations/top_hits_aggregation.rb +25 -0
  28. data/lib/elasticated/block_evaluation.rb +15 -0
  29. data/lib/elasticated/boolean_clause.rb +43 -0
  30. data/lib/elasticated/client.rb +84 -0
  31. data/lib/elasticated/clonable.rb +58 -0
  32. data/lib/elasticated/conditions/custom_condition.rb +19 -0
  33. data/lib/elasticated/conditions/exists_condition.rb +11 -0
  34. data/lib/elasticated/conditions/missing_condition.rb +11 -0
  35. data/lib/elasticated/conditions/nested_condition.rb +19 -0
  36. data/lib/elasticated/conditions/range_condition.rb +27 -0
  37. data/lib/elasticated/conditions/script_condition.rb +22 -0
  38. data/lib/elasticated/conditions/standard_condition.rb +26 -0
  39. data/lib/elasticated/conditions/terms_condition.rb +22 -0
  40. data/lib/elasticated/conditions/wildcard_condition.rb +18 -0
  41. data/lib/elasticated/conditions_builder.rb +75 -0
  42. data/lib/elasticated/configurable.rb +9 -0
  43. data/lib/elasticated/configuration.rb +9 -0
  44. data/lib/elasticated/default_logger.rb +27 -0
  45. data/lib/elasticated/delimiters/date_field_delimiter.rb +33 -0
  46. data/lib/elasticated/delimiters/standard_field_delimiter.rb +33 -0
  47. data/lib/elasticated/delimiters/term_field_delimiter.rb +24 -0
  48. data/lib/elasticated/document.rb +46 -0
  49. data/lib/elasticated/helpers.rb +28 -0
  50. data/lib/elasticated/index_selector.rb +44 -0
  51. data/lib/elasticated/inspectionable.rb +9 -0
  52. data/lib/elasticated/mapping.rb +19 -0
  53. data/lib/elasticated/mapping/builder.rb +36 -0
  54. data/lib/elasticated/mapping/fields_builder.rb +148 -0
  55. data/lib/elasticated/mapping/nested_builder.rb +15 -0
  56. data/lib/elasticated/mapping/object_builder.rb +15 -0
  57. data/lib/elasticated/mapping/partial.rb +11 -0
  58. data/lib/elasticated/mapping/type_builder.rb +14 -0
  59. data/lib/elasticated/partitioned_repository.rb +27 -0
  60. data/lib/elasticated/query.rb +159 -0
  61. data/lib/elasticated/query_aggregations.rb +71 -0
  62. data/lib/elasticated/query_conditions.rb +89 -0
  63. data/lib/elasticated/repositories/monthly_partitioned_repository.rb +96 -0
  64. data/lib/elasticated/repository.rb +139 -0
  65. data/lib/elasticated/results.rb +43 -0
  66. data/lib/version.rb +92 -0
  67. data/spec/aggregation_spec.rb +587 -0
  68. data/spec/date_field_delimiter_spec.rb +67 -0
  69. data/spec/document_spec.rb +44 -0
  70. data/spec/elasticsearch_hit_1.json +14 -0
  71. data/spec/elasticsearch_response_1.json +29 -0
  72. data/spec/elasticsearch_response_2.json +44 -0
  73. data/spec/elasticsearch_top_hits_response.json +20 -0
  74. data/spec/integration_spec.rb +184 -0
  75. data/spec/mapping_spec.rb +219 -0
  76. data/spec/monthly_partitioned_repository_spec.rb +99 -0
  77. data/spec/query_aggregations_spec.rb +44 -0
  78. data/spec/query_conditions_spec.rb +314 -0
  79. data/spec/query_spec.rb +265 -0
  80. data/spec/results_spec.rb +69 -0
  81. data/spec/spec_helper.rb +2 -0
  82. data/spec/term_field_delimiter_spec.rb +39 -0
  83. metadata +225 -0
@@ -0,0 +1,67 @@
1
+ require_relative 'spec_helper'
2
+
3
+ module Elasticated
4
+ module Delimiters
5
+ describe DateFieldDelimiter do
6
+
7
+ let :delimiter do
8
+ DateFieldDelimiter.new field: :created_at, as: :date
9
+ end
10
+
11
+ it "should not delimit anything" do
12
+ params = delimiter.build_strategy_params
13
+ expect(params).to be_empty
14
+ end
15
+
16
+ it "should delimit by one term" do
17
+ delimiter.add_term :created_at, '2016-07-19'
18
+ params = delimiter.build_strategy_params
19
+ expect(params).to eq date: '2016-07-19'
20
+ end
21
+
22
+ it "should raise when delimit by multiple terms" do
23
+ delimiter.add_term :created_at, 'nombre1'
24
+ delimiter.add_term :created_at, 'nombre2'
25
+ expect{ delimiter.build_strategy_params }.to raise_error
26
+ end
27
+
28
+ it "should delimit by a min date" do
29
+ delimiter.set_minimum :created_at, '2016-07-19'
30
+ params = delimiter.build_strategy_params
31
+ expect(params).to eq date_since: '2016-07-19'
32
+ end
33
+
34
+ it "should delimit by a max date" do
35
+ delimiter.set_maximum :created_at, '2016-07-19'
36
+ params = delimiter.build_strategy_params
37
+ expect(params).to eq date_until: '2016-07-19'
38
+ end
39
+
40
+ it "should delimit by a range of dates" do
41
+ delimiter.set_minimum :created_at, '2016-07-19'
42
+ delimiter.set_maximum :created_at, '2016-07-21'
43
+ params = delimiter.build_strategy_params
44
+ expect(params).to eq date_since: '2016-07-19', date_until: '2016-07-21'
45
+ end
46
+
47
+ it "should delimit by the most restricted range of dates" do
48
+ delimiter.set_minimum :created_at, '2016-07-19'
49
+ delimiter.set_maximum :created_at, '2016-07-25'
50
+ delimiter.set_minimum :created_at, '2016-07-17' # ignored minimum
51
+ delimiter.set_maximum :created_at, '2016-07-24'
52
+ delimiter.set_maximum :created_at, '2016-07-26' # ignored maximum
53
+ params = delimiter.build_strategy_params
54
+ expect(params).to eq date_since: '2016-07-19', date_until: '2016-07-24'
55
+ end
56
+
57
+ it "should prefer a term over a range of dates" do
58
+ delimiter.set_minimum :created_at, '2016-07-19'
59
+ delimiter.set_maximum :created_at, '2016-07-21'
60
+ delimiter.add_term :created_at, '2016-07-20'
61
+ params = delimiter.build_strategy_params
62
+ expect(params).to eq date: '2016-07-20'
63
+ end
64
+
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,44 @@
1
+ require_relative 'spec_helper'
2
+
3
+ module Elasticated
4
+ describe Document do
5
+
6
+ def open_hit(name)
7
+ JSON.parse File.read "spec/#{name}.json"
8
+ end
9
+
10
+ let :hit do
11
+ open_hit 'elasticsearch_hit_1'
12
+ end
13
+
14
+ it "should parse an elasticsearch hit" do
15
+ document = Document.from_elasticsearch_hit hit
16
+ expect(document.id).to eq 'AU-CLCguwlaKln07OA4x'
17
+ expect(document.index).to eq 'fbinsights-v6-2015-08'
18
+ expect(document.type).to eq 'post'
19
+ expect(document.score).to eq 7.6152167
20
+ expect(document.source).not_to be_nil
21
+ expect(document.source.keys.count).to eq 6
22
+ end
23
+
24
+ context "the document source" do
25
+
26
+ it "can be accessed via methods" do
27
+ document = Document.from_elasticsearch_hit hit
28
+ expect(document.source.page_id).to eq '127735990588679'
29
+ end
30
+
31
+ it "can be accessed via key" do
32
+ document = Document.from_elasticsearch_hit hit
33
+ expect(document.source['page_id']).to eq '127735990588679'
34
+ end
35
+
36
+ it "can be accessed via indifferent key" do
37
+ document = Document.from_elasticsearch_hit hit
38
+ expect(document.source[:page_id]).to eq '127735990588679'
39
+ end
40
+
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,14 @@
1
+ {
2
+ "_index": "fbinsights-v6-2015-08",
3
+ "_type": "post",
4
+ "_id": "AU-CLCguwlaKln07OA4x",
5
+ "_score": 7.6152167,
6
+ "_source": {
7
+ "date": "2015-08-31",
8
+ "type": "post",
9
+ "element_id": "127735990588679_1074565052572430",
10
+ "social_content_id": "127735990588679_1074565052572430-QuilmesMoniFBPost-inbox-68",
11
+ "page_id": "127735990588679",
12
+ "sub_type": "link"
13
+ }
14
+ }
@@ -0,0 +1,29 @@
1
+ {
2
+ "took": 106,
3
+ "timed_out": false,
4
+ "_shards": {
5
+ "total": 3,
6
+ "successful": 3,
7
+ "failed": 0
8
+ },
9
+ "hits": {
10
+ "total": 1,
11
+ "max_score": 15.003607,
12
+ "hits": [
13
+ {
14
+ "_index": "fbinsights-v6-2015-08",
15
+ "_type": "post",
16
+ "_id": "AU-TsZrUyuAii-shqAtn",
17
+ "_score": 15.003607,
18
+ "_source": {
19
+ "element_id": "69004099657_10153641105209658",
20
+ "element_created_at": "2015-08-31T17:40:43.000Z",
21
+ "page_id": "69004099657",
22
+ "date": "2015-08-31",
23
+ "sub_type": "photo",
24
+ "type": "post"
25
+ }
26
+ }
27
+ ]
28
+ }
29
+ }
@@ -0,0 +1,44 @@
1
+ {
2
+ "took": 140,
3
+ "timed_out": false,
4
+ "_shards": {
5
+ "total": 3,
6
+ "successful": 3,
7
+ "failed": 0
8
+ },
9
+ "hits": {
10
+ "total": 340,
11
+ "max_score": 7.6152167,
12
+ "hits": [
13
+ {
14
+ "_index": "fbinsights-v6-2015-08",
15
+ "_type": "post",
16
+ "_id": "AU-CLCguwlaKln07OA4x",
17
+ "_score": 7.6152167,
18
+ "_source": {
19
+ "date": "2015-08-31",
20
+ "type": "post",
21
+ "element_id": "127735990588679_1074565052572430",
22
+ "social_content_id": "127735990588679_1074565052572430-QuilmesMoniFBPost-inbox-68",
23
+ "page_id": "127735990588679",
24
+ "sub_type": "link"
25
+ }
26
+ }
27
+ ]
28
+ },
29
+ "aggregations": {
30
+ "ads": {
31
+ "doc_count": 259,
32
+ "group_by_adgroup": {
33
+ "doc_count_error_upper_bound": 3,
34
+ "sum_other_doc_count": 360,
35
+ "buckets": [
36
+ {
37
+ "key": "6023316427914",
38
+ "doc_count": 1
39
+ }
40
+ ]
41
+ }
42
+ }
43
+ }
44
+ }
@@ -0,0 +1,20 @@
1
+ {
2
+ "hits": {
3
+ "total": 25365,
4
+ "max_score": 1,
5
+ "hits": [
6
+ {
7
+ "_index": "stack",
8
+ "_type": "question",
9
+ "_id": "602679",
10
+ "_score": 1,
11
+ "_source": {
12
+ "title": "Windows port opening"
13
+ },
14
+ "sort": [
15
+ 1370143231177
16
+ ]
17
+ }
18
+ ]
19
+ }
20
+ }
@@ -0,0 +1,184 @@
1
+ require_relative 'spec_helper'
2
+
3
+ module Elasticated
4
+ describe "Integrations" do
5
+
6
+ it "should build a complex query" do
7
+ query = Query.build do
8
+ conditions{ equal :first_name, :pablo; equal :last_name, :fernandez }
9
+ filter{ equal :age, 23 }
10
+ aggregations do
11
+ group :materia do
12
+ max :nota
13
+ end
14
+ end
15
+ size 20
16
+ from 5
17
+ sort :element_id
18
+ source :nota
19
+ end
20
+ expect(query.build).to eq(
21
+ query: {
22
+ filtered: {
23
+ filter: {
24
+ terms: { age: [23] }
25
+ },
26
+ query: {
27
+ bool: {
28
+ must: [
29
+ { terms: { first_name: [:pablo] } },
30
+ { terms: { last_name: [:fernandez] } }
31
+ ]
32
+ }
33
+ }
34
+ }
35
+ },
36
+ aggs: {
37
+ group_by_materia: {
38
+ terms: { field: :materia, size: 0 },
39
+ aggs: {
40
+ max_nota: {
41
+ max: { field: :nota }
42
+ }
43
+ }
44
+ }
45
+ },
46
+ size: 20,
47
+ from: 5,
48
+ _source: [:nota],
49
+ sort: [{ element_id: { order: :asc } }]
50
+ )
51
+ end
52
+
53
+ it "should merge complex aggregations" do
54
+ query = Query.build do
55
+ aggregations do
56
+ date_histogram :date, interval: '7d' do
57
+ group :user do
58
+ count_filtered(:element_id){ equal :element_id, 2 }
59
+ end
60
+ nested 'path' do
61
+ group 'path.metric.name', as: 'metric' do
62
+ max 'path.metric.value', as: 'metric_value'
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ query.aggregations do
69
+ nested 'path'
70
+ date_histogram :date, interval: '7d' do
71
+ nested 'path' do
72
+ group 'path.metric.name', as: 'metric' do
73
+ max 'path.metric.value', as: 'some_value'
74
+ end
75
+ end
76
+ end
77
+ end
78
+ expect(query.build).to eq(
79
+ query: { match_all: {} },
80
+ aggs: {
81
+ path: {
82
+ nested: { path: 'path' }
83
+ },
84
+ group_by_date: {
85
+ date_histogram: { field: :date, interval: '7d', format: 'yyyy-MM-dd' },
86
+ aggs: {
87
+ group_by_user: {
88
+ terms: { field: :user, size: 0 },
89
+ aggs: {
90
+ element_id: {
91
+ filter: { terms: { element_id: [2] } }
92
+ }
93
+ }
94
+ },
95
+ path: {
96
+ nested: { path: 'path' },
97
+ aggs: {
98
+ metric: {
99
+ terms: { field: 'path.metric.name', size: 0 },
100
+ aggs: {
101
+ metric_value: {
102
+ max: { field: 'path.metric.value' }
103
+ },
104
+ some_value: {
105
+ max: { field: 'path.metric.value' }
106
+ }
107
+ }
108
+ }
109
+ }
110
+ }
111
+ }
112
+ }
113
+ }
114
+ )
115
+ end
116
+
117
+ it "should build queries and return results on aggregations including special chars" do
118
+ query = Query.build do
119
+ conditions{ equal :first_name, :pablo; equal :last_name, :fernandez }
120
+ filter{ equal :age, 23 }
121
+ aggregations do
122
+ group :materia, {as: "materia <+> nota"}
123
+ end
124
+ size 20
125
+ from 5
126
+ sort :element_id
127
+ source :nota
128
+ end
129
+ expect(query.build).to eq(
130
+ query: {
131
+ filtered: {
132
+ filter: {
133
+ terms: { age: [23] }
134
+ },
135
+ query: {
136
+ bool: {
137
+ must: [
138
+ { terms: { first_name: [:pablo] } },
139
+ { terms: { last_name: [:fernandez] } }
140
+ ]
141
+ }
142
+ }
143
+ }
144
+ },
145
+ aggs: {
146
+ materia_____nota: {
147
+ terms: { field: :materia, size: 0 },
148
+ }
149
+ },
150
+ size: 20,
151
+ from: 5,
152
+ _source: [:nota],
153
+ sort: [{ element_id: { order: :asc } }]
154
+ )
155
+ expect(
156
+ query.parse_aggregations(
157
+ {
158
+ "materia_____nota"=>
159
+ {
160
+ "buckets" =>
161
+ [
162
+ {
163
+ "key"=>"user1",
164
+ "doc_count"=>2
165
+ }
166
+ ]
167
+ }
168
+ }
169
+ )
170
+ ).to eq(
171
+ {
172
+ "materia <+> nota" =>
173
+ {
174
+ "user1" =>
175
+ {
176
+ "count" => 2
177
+ }
178
+ }
179
+ }
180
+ )
181
+ end
182
+
183
+ end
184
+ end
@@ -0,0 +1,219 @@
1
+ require_relative 'spec_helper'
2
+
3
+ module Elasticated
4
+ describe Mapping do
5
+
6
+ it "should build a single integer property" do
7
+ result = Mapping.build do |mapping|
8
+ mapping.type :a_type do |type|
9
+ type.integer :an_integer
10
+ end
11
+ end
12
+ expected_result = {
13
+ a_type: {
14
+ properties: {
15
+ an_integer: { type: :integer, fielddata: { format: :doc_values } }
16
+ }
17
+ }
18
+ }
19
+ expect(result).to eq expected_result
20
+ end
21
+
22
+ it "should build a single long property" do
23
+ result = Mapping.build do |mapping|
24
+ mapping.type :a_type do |type|
25
+ type.long :a_long
26
+ end
27
+ end
28
+ expected_result = {
29
+ a_type: {
30
+ properties: {
31
+ a_long: { type: :long, fielddata: { format: :doc_values } }
32
+ }
33
+ }
34
+ }
35
+ expect(result).to eq expected_result
36
+ end
37
+
38
+ it "should build a single date property" do
39
+ result = Mapping.build do |mapping|
40
+ mapping.type :a_type do |type|
41
+ type.date :a_date
42
+ end
43
+ end
44
+ expected_result = {
45
+ a_type: {
46
+ properties: {
47
+ a_date: { type: :date, fielddata: { format: :doc_values } }
48
+ }
49
+ }
50
+ }
51
+ expect(result).to eq expected_result
52
+ end
53
+
54
+ it "should build a single string property" do
55
+ result = Mapping.build do |mapping|
56
+ mapping.type :a_type do |type|
57
+ type.string :a_string
58
+ end
59
+ end
60
+ expected_result = {
61
+ a_type: {
62
+ properties: {
63
+ a_string: { type: :string, index: :not_analyzed, fielddata: { format: :doc_values } }
64
+ }
65
+ }
66
+ }
67
+ expect(result).to eq expected_result
68
+ end
69
+
70
+ it "should build a single analyzed string property" do
71
+ result = Mapping.build do |mapping|
72
+ mapping.type :a_type do |type|
73
+ type.analyzed_string :a_string
74
+ end
75
+ end
76
+ expected_result = {
77
+ a_type: {
78
+ properties: {
79
+ a_string: {
80
+ type: :multi_field,
81
+ fields: {
82
+ a_string: { type: :string, index: :not_analyzed, fielddata: { format: :doc_values } },
83
+ analyzed: { type: :string }
84
+ }
85
+ }
86
+ }
87
+ }
88
+ }
89
+ expect(result).to eq expected_result
90
+ end
91
+
92
+ it "should build a single bool property" do
93
+ result = Mapping.build do |mapping|
94
+ mapping.type :a_type do |type|
95
+ type.bool :a_bool
96
+ end
97
+ end
98
+ expected_result = {
99
+ a_type: {
100
+ properties: {
101
+ a_bool: { type: :boolean }
102
+ }
103
+ }
104
+ }
105
+ expect(result).to eq expected_result
106
+ end
107
+
108
+ it "should build a single object" do
109
+ result = Mapping.build do |mapping|
110
+ mapping.type :a_type do |type|
111
+ type.object :an_object do |object|
112
+ object.string :a_string
113
+ end
114
+ end
115
+ end
116
+ expected_result = {
117
+ a_type: {
118
+ properties: {
119
+ an_object: {
120
+ type: :object,
121
+ properties: {
122
+ a_string: { type: :string, index: :not_analyzed, fielddata: { format: :doc_values } }
123
+ }
124
+ }
125
+ }
126
+ }
127
+ }
128
+ expect(result).to eq expected_result
129
+ end
130
+
131
+ it "should build a single nested" do
132
+ result = Mapping.build do |mapping|
133
+ mapping.type :a_type do |type|
134
+ type.nested :a_nested do |object|
135
+ object.string :a_key
136
+ object.long :a_value
137
+ end
138
+ end
139
+ end
140
+ expected_result = {
141
+ a_type: {
142
+ properties: {
143
+ a_nested: {
144
+ type: :nested,
145
+ properties: {
146
+ a_key: { type: :string, index: :not_analyzed, fielddata: { format: :doc_values } },
147
+ a_value: { type: :long, fielddata: { format: :doc_values } }
148
+ }
149
+ }
150
+ }
151
+ }
152
+ }
153
+ expect(result).to eq expected_result
154
+ end
155
+
156
+ it "should build a complex mapping" do
157
+ result = Mapping.build do |mapping|
158
+ mapping.type :post do |type|
159
+ type.string :username
160
+ type.analyzed_string :element_id
161
+ type.object :day do |object|
162
+ object.long :impressions
163
+ end
164
+ type.object :lifetime do |object|
165
+ object.long :impressions
166
+ object.object :reach do |reach|
167
+ reach.object :origin do |origin|
168
+ origin.string :name
169
+ origin.string :from
170
+ end
171
+ end
172
+ end
173
+ end
174
+ mapping.type :user do |type|
175
+ type.date :date
176
+ end
177
+ end
178
+ expected_result = {
179
+ user: {
180
+ properties: {
181
+ date: { type: :date, fielddata: { format: :doc_values } }
182
+ }
183
+ },
184
+ post: {
185
+ properties: {
186
+ username: { type: :string, index: :not_analyzed, fielddata: { format: :doc_values } },
187
+ element_id: {
188
+ type: :multi_field,
189
+ fields: {
190
+ a_string: { type: :string, index: :not_analyzed, fielddata: { format: :doc_values } },
191
+ analyzed: { type: :string }
192
+ }
193
+ },
194
+ day: {
195
+ type: :object,
196
+ properties: {
197
+ impressions: { type: :long, fielddata: { format: :doc_values } }
198
+ }
199
+ },
200
+ lifetime: {
201
+ type: :object,
202
+ properties: {
203
+ impressions: { type: :long, fielddata: { format: :doc_values } },
204
+ reach: {
205
+ type: :object,
206
+ properties: {
207
+ name: { type: :string, index: :not_analyzed, fielddata: { format: :doc_values } },
208
+ from: { type: :string, index: :not_analyzed, fielddata: { format: :doc_values } }
209
+ }
210
+ }
211
+ }
212
+ }
213
+ }
214
+ }
215
+ }
216
+ end
217
+
218
+ end
219
+ end