connectors_service 8.6.0.4.pre.20221104T200814Z → 8.6.0.4.pre.20221114T233727Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/connectors.yml +6 -6
- data/lib/app/dispatcher.rb +12 -0
- data/lib/app/preflight_check.rb +11 -0
- data/lib/connectors/base/connector.rb +19 -12
- data/lib/connectors/base/simple_rules_parser.rb +42 -0
- data/lib/connectors/example/connector.rb +15 -0
- data/lib/connectors/gitlab/connector.rb +15 -1
- data/lib/connectors/mongodb/connector.rb +55 -36
- data/lib/connectors/mongodb/mongo_rules_parser.rb +81 -0
- data/lib/core/configuration.rb +3 -1
- data/lib/core/connector_job.rb +137 -0
- data/lib/core/connector_settings.rb +24 -11
- data/lib/core/elastic_connector_actions.rb +263 -24
- data/lib/core/filtering/post_process_engine.rb +39 -0
- data/lib/core/filtering/post_process_result.rb +27 -0
- data/lib/core/filtering/simple_rule.rb +141 -0
- data/lib/core/filtering/validation_job_runner.rb +53 -0
- data/lib/core/filtering/validation_status.rb +17 -0
- data/lib/core/filtering.rb +17 -0
- data/lib/core/ingestion/es_sink.rb +59 -0
- data/lib/core/ingestion/ingester.rb +90 -0
- data/lib/core/{output_sink.rb → ingestion.rb} +2 -5
- data/lib/core/scheduler.rb +40 -10
- data/lib/core/sync_job_runner.rb +65 -17
- data/lib/core.rb +2 -0
- data/lib/utility/bulk_queue.rb +85 -0
- data/lib/utility/constants.rb +2 -0
- data/lib/utility/filtering.rb +22 -0
- data/lib/utility/logger.rb +2 -1
- data/lib/utility.rb +5 -4
- metadata +16 -7
- data/lib/core/output_sink/base_sink.rb +0 -33
- data/lib/core/output_sink/combined_sink.rb +0 -38
- data/lib/core/output_sink/console_sink.rb +0 -51
- data/lib/core/output_sink/es_sink.rb +0 -74
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
10
10
|
require 'connectors/connector_status'
|
11
|
+
require 'connectors/registry'
|
11
12
|
require 'core/elastic_connector_actions'
|
12
13
|
require 'utility'
|
13
14
|
|
@@ -34,13 +35,15 @@ module Core
|
|
34
35
|
new(es_response, connectors_meta)
|
35
36
|
end
|
36
37
|
|
37
|
-
def initialize(es_response, connectors_meta)
|
38
|
-
@elasticsearch_response = es_response.with_indifferent_access
|
39
|
-
@connectors_meta = connectors_meta.with_indifferent_access
|
40
|
-
end
|
41
|
-
|
42
38
|
def self.fetch_native_connectors(page_size = DEFAULT_PAGE_SIZE)
|
43
|
-
query = {
|
39
|
+
query = {
|
40
|
+
bool: {
|
41
|
+
filter: [
|
42
|
+
{ term: { is_native: true } },
|
43
|
+
{ terms: { service_type: Connectors::REGISTRY.registered_connectors } }
|
44
|
+
]
|
45
|
+
}
|
46
|
+
}
|
44
47
|
fetch_connectors_by_query(query, page_size)
|
45
48
|
end
|
46
49
|
|
@@ -83,23 +86,26 @@ module Core
|
|
83
86
|
end
|
84
87
|
|
85
88
|
def filtering
|
86
|
-
|
89
|
+
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
90
|
+
filtering = @elasticsearch_response.dig(:_source, :filtering)
|
91
|
+
|
92
|
+
Utility::Filtering.extract_filter(filtering)
|
87
93
|
end
|
88
94
|
|
89
95
|
def request_pipeline
|
90
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
96
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
91
97
|
end
|
92
98
|
|
93
99
|
def extract_binary_content?
|
94
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
100
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
95
101
|
end
|
96
102
|
|
97
103
|
def reduce_whitespace?
|
98
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
104
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
99
105
|
end
|
100
106
|
|
101
107
|
def run_ml_inference?
|
102
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
108
|
+
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
103
109
|
end
|
104
110
|
|
105
111
|
def formatted
|
@@ -116,6 +122,13 @@ module Core
|
|
116
122
|
index_name&.start_with?(Utility::Constants::CONTENT_INDEX_PREFIX)
|
117
123
|
end
|
118
124
|
|
125
|
+
private
|
126
|
+
|
127
|
+
def initialize(es_response, connectors_meta)
|
128
|
+
@elasticsearch_response = es_response.with_indifferent_access
|
129
|
+
@connectors_meta = connectors_meta.with_indifferent_access
|
130
|
+
end
|
131
|
+
|
119
132
|
def self.fetch_connectors_by_query(query, page_size)
|
120
133
|
connectors_meta = ElasticConnectorActions.connectors_meta
|
121
134
|
|
@@ -19,6 +19,12 @@ module Core
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
+
class JobNotCreatedError < StandardError
|
23
|
+
def initialize(connector_id, response)
|
24
|
+
super("Sync job for connector '#{connector_id}' could not be created. Response: #{response}")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
22
28
|
class ConnectorVersionChangedError < StandardError
|
23
29
|
def initialize(connector_id, seq_no, primary_term)
|
24
30
|
super("Version conflict: seq_no [#{seq_no}] and primary_term [#{primary_term}] do not match for connector '#{connector_id}'.")
|
@@ -43,10 +49,17 @@ module Core
|
|
43
49
|
end
|
44
50
|
|
45
51
|
def get_connector(connector_id)
|
52
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
46
53
|
client.get(:index => Utility::Constants::CONNECTORS_INDEX, :id => connector_id, :ignore => 404).with_indifferent_access
|
47
54
|
end
|
48
55
|
|
56
|
+
def get_job(job_id)
|
57
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
58
|
+
client.get(:index => Utility::Constants::JOB_INDEX, :id => job_id, :ignore => 404).with_indifferent_access
|
59
|
+
end
|
60
|
+
|
49
61
|
def connectors_meta
|
62
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
50
63
|
alias_mappings = client.indices.get_mapping(:index => Utility::Constants::CONNECTORS_INDEX).with_indifferent_access
|
51
64
|
index = get_latest_index_in_alias(Utility::Constants::CONNECTORS_INDEX, alias_mappings.keys)
|
52
65
|
alias_mappings.dig(index, 'mappings', '_meta') || {}
|
@@ -65,6 +78,19 @@ module Core
|
|
65
78
|
)
|
66
79
|
end
|
67
80
|
|
81
|
+
def search_jobs(query, page_size, offset)
|
82
|
+
client.search(
|
83
|
+
:index => Utility::Constants::JOB_INDEX,
|
84
|
+
:ignore => 404,
|
85
|
+
:body => {
|
86
|
+
:size => page_size,
|
87
|
+
:from => offset,
|
88
|
+
:query => query,
|
89
|
+
:sort => ['created_at']
|
90
|
+
}
|
91
|
+
)
|
92
|
+
end
|
93
|
+
|
68
94
|
def update_connector_configuration(connector_id, configuration)
|
69
95
|
update_connector_fields(connector_id, :configuration => configuration)
|
70
96
|
end
|
@@ -84,6 +110,28 @@ module Core
|
|
84
110
|
update_connector_configuration(connector_id, payload)
|
85
111
|
end
|
86
112
|
|
113
|
+
def update_filtering_validation(connector_id, filter_validation_results)
|
114
|
+
return if filter_validation_results.empty?
|
115
|
+
|
116
|
+
filtering = get_connector(connector_id).dig(:_source, :filtering)
|
117
|
+
|
118
|
+
case filtering
|
119
|
+
when Hash
|
120
|
+
update_filter_validation(filtering, filter_validation_results)
|
121
|
+
when Array
|
122
|
+
return unless should_update_validations?(filter_validation_results, filtering)
|
123
|
+
|
124
|
+
filtering.each do |filter|
|
125
|
+
update_filter_validation(filter, filter_validation_results)
|
126
|
+
end
|
127
|
+
else
|
128
|
+
Utility::Logger.warn("Elasticsearch returned invalid filtering format: #{filtering}. Skipping validation.")
|
129
|
+
return
|
130
|
+
end
|
131
|
+
|
132
|
+
update_connector_fields(connector_id, { :filtering => filtering })
|
133
|
+
end
|
134
|
+
|
87
135
|
def claim_job(connector_id)
|
88
136
|
seq_no = nil
|
89
137
|
primary_term = nil
|
@@ -111,24 +159,38 @@ module Core
|
|
111
159
|
)
|
112
160
|
|
113
161
|
body = {
|
114
|
-
:connector_id => connector_id,
|
115
162
|
:status => Connectors::SyncStatus::IN_PROGRESS,
|
116
163
|
:worker_hostname => Socket.gethostname,
|
117
164
|
:created_at => Time.now,
|
118
|
-
:
|
165
|
+
:started_at => Time.now,
|
166
|
+
:last_seen => Time.now,
|
167
|
+
:connector => {
|
168
|
+
:id => connector_id,
|
169
|
+
:filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
|
170
|
+
}
|
119
171
|
}
|
120
172
|
|
121
|
-
client.index(:index => Utility::Constants::JOB_INDEX, :body => body)
|
173
|
+
index_response = client.index(:index => Utility::Constants::JOB_INDEX, :body => body, :refresh => true)
|
174
|
+
if index_response['result'] == 'created'
|
175
|
+
# TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
|
176
|
+
return client.get(
|
177
|
+
:index => Utility::Constants::JOB_INDEX,
|
178
|
+
:id => index_response['_id'],
|
179
|
+
:ignore => 404
|
180
|
+
).with_indifferent_access
|
181
|
+
end
|
182
|
+
raise JobNotCreatedError.new(connector_id, index_response)
|
122
183
|
end
|
123
184
|
|
124
185
|
def convert_connector_filtering_to_job_filtering(connector_filtering)
|
125
186
|
return [] unless connector_filtering
|
126
187
|
connector_filtering = [connector_filtering] unless connector_filtering.is_a?(Array)
|
127
188
|
connector_filtering.each_with_object([]) do |filtering_domain, job_filtering|
|
189
|
+
snippet = filtering_domain.dig('active', 'advanced_snippet') || {}
|
128
190
|
job_filtering << {
|
129
191
|
'domain' => filtering_domain['domain'],
|
130
192
|
'rules' => filtering_domain.dig('active', 'rules'),
|
131
|
-
'advanced_snippet' =>
|
193
|
+
'advanced_snippet' => snippet['value'] || snippet,
|
132
194
|
'warnings' => [] # TODO: in https://github.com/elastic/enterprise-search-team/issues/3174
|
133
195
|
}
|
134
196
|
end
|
@@ -145,22 +207,33 @@ module Core
|
|
145
207
|
update_connector_fields(connector_id, body)
|
146
208
|
end
|
147
209
|
|
148
|
-
def
|
149
|
-
|
210
|
+
def update_sync(job_id, metadata)
|
211
|
+
body = {
|
212
|
+
:doc => { :last_seen => Time.now }.merge(metadata)
|
213
|
+
}
|
214
|
+
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
215
|
+
end
|
216
|
+
|
217
|
+
def complete_sync(connector_id, job_id, metadata, error)
|
218
|
+
sync_status = error ? Connectors::SyncStatus::ERROR : Connectors::SyncStatus::COMPLETED
|
219
|
+
|
220
|
+
metadata ||= {}
|
150
221
|
|
151
222
|
update_connector_fields(connector_id,
|
152
223
|
:last_sync_status => sync_status,
|
153
|
-
:last_sync_error =>
|
154
|
-
:error =>
|
224
|
+
:last_sync_error => error,
|
225
|
+
:error => error,
|
155
226
|
:last_synced => Time.now,
|
156
|
-
:last_indexed_document_count =>
|
157
|
-
:last_deleted_document_count =>
|
227
|
+
:last_indexed_document_count => metadata[:indexed_document_count],
|
228
|
+
:last_deleted_document_count => metadata[:deleted_document_count])
|
158
229
|
|
159
230
|
body = {
|
160
231
|
:doc => {
|
161
232
|
:status => sync_status,
|
162
|
-
:completed_at => Time.now
|
163
|
-
|
233
|
+
:completed_at => Time.now,
|
234
|
+
:last_seen => Time.now,
|
235
|
+
:error => error
|
236
|
+
}.merge(metadata)
|
164
237
|
}
|
165
238
|
client.update(:index => Utility::Constants::JOB_INDEX, :id => job_id, :body => body)
|
166
239
|
end
|
@@ -248,12 +321,105 @@ module Core
|
|
248
321
|
:properties => {
|
249
322
|
:api_key_id => { :type => :keyword },
|
250
323
|
:configuration => { :type => :object },
|
251
|
-
:
|
324
|
+
:description => { :type => :text },
|
325
|
+
:error => { :type => :keyword },
|
326
|
+
:features => {
|
327
|
+
:properties => {
|
328
|
+
:filtering_advanced_config => { :type => :boolean },
|
329
|
+
:filtering_rules => { :type => :boolean }
|
330
|
+
}
|
331
|
+
},
|
332
|
+
:filtering => {
|
333
|
+
:properties => {
|
334
|
+
:domain => { :type => :keyword },
|
335
|
+
:active => {
|
336
|
+
:properties => {
|
337
|
+
:rules => {
|
338
|
+
:properties => {
|
339
|
+
:id => { :type => :keyword },
|
340
|
+
:policy => { :type => :keyword },
|
341
|
+
:field => { :type => :keyword },
|
342
|
+
:rule => { :type => :keyword },
|
343
|
+
:value => { :type => :keyword },
|
344
|
+
:order => { :type => :short },
|
345
|
+
:created_at => { :type => :date },
|
346
|
+
:updated_at => { :type => :date }
|
347
|
+
}
|
348
|
+
},
|
349
|
+
:advanced_snippet => {
|
350
|
+
:properties => {
|
351
|
+
:value => { :type => :object },
|
352
|
+
:created_at => { :type => :date },
|
353
|
+
:updated_at => { :type => :date }
|
354
|
+
}
|
355
|
+
},
|
356
|
+
:validation => {
|
357
|
+
:properties => {
|
358
|
+
:state => { :type => :keyword },
|
359
|
+
:errors => {
|
360
|
+
:properties => {
|
361
|
+
:ids => { :type => :keyword },
|
362
|
+
:messages => { :type => :text }
|
363
|
+
}
|
364
|
+
}
|
365
|
+
}
|
366
|
+
}
|
367
|
+
}
|
368
|
+
},
|
369
|
+
:draft => {
|
370
|
+
:properties => {
|
371
|
+
:rules => {
|
372
|
+
:properties => {
|
373
|
+
:id => { :type => :keyword },
|
374
|
+
:policy => { :type => :keyword },
|
375
|
+
:field => { :type => :keyword },
|
376
|
+
:rule => { :type => :keyword },
|
377
|
+
:value => { :type => :keyword },
|
378
|
+
:order => { :type => :short },
|
379
|
+
:created_at => { :type => :date },
|
380
|
+
:updated_at => { :type => :date }
|
381
|
+
}
|
382
|
+
},
|
383
|
+
:advanced_snippet => {
|
384
|
+
:properties => {
|
385
|
+
:value => { :type => :object },
|
386
|
+
:created_at => { :type => :date },
|
387
|
+
:updated_at => { :type => :date }
|
388
|
+
}
|
389
|
+
},
|
390
|
+
:validation => {
|
391
|
+
:properties => {
|
392
|
+
:state => { :type => :keyword },
|
393
|
+
:errors => {
|
394
|
+
:properties => {
|
395
|
+
:ids => { :type => :keyword },
|
396
|
+
:messages => { :type => :text }
|
397
|
+
}
|
398
|
+
}
|
399
|
+
}
|
400
|
+
}
|
401
|
+
}
|
402
|
+
}
|
403
|
+
}
|
404
|
+
},
|
252
405
|
:index_name => { :type => :keyword },
|
406
|
+
:is_native => { :type => :boolean },
|
407
|
+
:language => { :type => :keyword },
|
253
408
|
:last_seen => { :type => :date },
|
409
|
+
:last_sync_error => { :type => :keyword },
|
410
|
+
:last_sync_status => { :type => :keyword },
|
254
411
|
:last_synced => { :type => :date },
|
255
|
-
:
|
256
|
-
:
|
412
|
+
:last_deleted_document_count => { :type => :long },
|
413
|
+
:last_indexed_document_count => { :type => :long },
|
414
|
+
:name => { :type => :keyword },
|
415
|
+
:pipeline => {
|
416
|
+
:properties => {
|
417
|
+
:extract_binary_content => { :type => :boolean },
|
418
|
+
:name => { :type => :keyword },
|
419
|
+
:reduce_whitespace => { :type => :boolean },
|
420
|
+
:run_ml_inference => { :type => :boolean }
|
421
|
+
}
|
422
|
+
},
|
257
423
|
:scheduling => {
|
258
424
|
:properties => {
|
259
425
|
:enabled => { :type => :boolean },
|
@@ -262,9 +428,7 @@ module Core
|
|
262
428
|
},
|
263
429
|
:service_type => { :type => :keyword },
|
264
430
|
:status => { :type => :keyword },
|
265
|
-
:
|
266
|
-
:sync_now => { :type => :boolean },
|
267
|
-
:sync_status => { :type => :keyword }
|
431
|
+
:sync_now => { :type => :boolean }
|
268
432
|
}
|
269
433
|
}
|
270
434
|
ensure_index_exists("#{Utility::Constants::CONNECTORS_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::CONNECTORS_INDEX, :mappings => mappings))
|
@@ -275,14 +439,68 @@ module Core
|
|
275
439
|
def ensure_job_index_exists
|
276
440
|
mappings = {
|
277
441
|
:properties => {
|
278
|
-
:
|
279
|
-
:
|
442
|
+
:cancelation_requested_at => { :type => :date },
|
443
|
+
:canceled_at => { :type => :date },
|
444
|
+
:completed_at => { :type => :date },
|
445
|
+
:connector => {
|
446
|
+
:properties => {
|
447
|
+
:configuration => { :type => :object },
|
448
|
+
:filtering => {
|
449
|
+
:properties => {
|
450
|
+
:domain => { :type => :keyword },
|
451
|
+
:rules => {
|
452
|
+
:properties => {
|
453
|
+
:id => { :type => :keyword },
|
454
|
+
:policy => { :type => :keyword },
|
455
|
+
:field => { :type => :keyword },
|
456
|
+
:rule => { :type => :keyword },
|
457
|
+
:value => { :type => :keyword },
|
458
|
+
:order => { :type => :short },
|
459
|
+
:created_at => { :type => :date },
|
460
|
+
:updated_at => { :type => :date }
|
461
|
+
}
|
462
|
+
},
|
463
|
+
:advanced_snippet => {
|
464
|
+
:properties => {
|
465
|
+
:value => { :type => :object },
|
466
|
+
:created_at => { :type => :date },
|
467
|
+
:updated_at => { :type => :date }
|
468
|
+
}
|
469
|
+
},
|
470
|
+
:warnings => {
|
471
|
+
:properties => {
|
472
|
+
:ids => { :type => :keyword },
|
473
|
+
:messages => { :type => :text }
|
474
|
+
}
|
475
|
+
}
|
476
|
+
}
|
477
|
+
},
|
478
|
+
:id => { :type => :keyword },
|
479
|
+
:index_name => { :type => :keyword },
|
480
|
+
:language => { :type => :keyword },
|
481
|
+
:pipeline => {
|
482
|
+
:properties => {
|
483
|
+
:extract_binary_content => { :type => :boolean },
|
484
|
+
:name => { :type => :keyword },
|
485
|
+
:reduce_whitespace => { :type => :boolean },
|
486
|
+
:run_ml_inference => { :type => :boolean }
|
487
|
+
}
|
488
|
+
},
|
489
|
+
:service_type => { :type => :keyword }
|
490
|
+
}
|
491
|
+
},
|
492
|
+
:created_at => { :type => :date },
|
493
|
+
:deleted_document_count => { :type => :integer },
|
280
494
|
:error => { :type => :text },
|
281
|
-
:worker_hostname => { :type => :keyword },
|
282
495
|
:indexed_document_count => { :type => :integer },
|
283
|
-
:
|
284
|
-
:
|
285
|
-
:
|
496
|
+
:indexed_document_volume => { :type => :integer },
|
497
|
+
:last_seen => { :type => :date },
|
498
|
+
:metadata => { :type => :object },
|
499
|
+
:started_at => { :type => :date },
|
500
|
+
:status => { :type => :keyword },
|
501
|
+
:total_document_count => { :type => :integer },
|
502
|
+
:trigger_method => { :type => :keyword },
|
503
|
+
:worker_hostname => { :type => :keyword }
|
286
504
|
}
|
287
505
|
}
|
288
506
|
ensure_index_exists("#{Utility::Constants::JOB_INDEX}-v1", system_index_body(:alias_name => Utility::Constants::JOB_INDEX, :mappings => mappings))
|
@@ -313,8 +531,20 @@ module Core
|
|
313
531
|
end
|
314
532
|
end
|
315
533
|
|
534
|
+
def document_count(index_name)
|
535
|
+
client.count(:index => index_name)['count']
|
536
|
+
end
|
537
|
+
|
316
538
|
private
|
317
539
|
|
540
|
+
def should_update_validations?(domain_validations, filtering)
|
541
|
+
domains_present = filtering.collect { |filter| filter[:domain] }
|
542
|
+
domains_to_update = domain_validations.keys
|
543
|
+
|
544
|
+
# non-empty intersection -> domains to update present
|
545
|
+
!(domains_present & domains_to_update).empty?
|
546
|
+
end
|
547
|
+
|
318
548
|
def client
|
319
549
|
@client ||= Utility::EsClient.new(App::Config[:elasticsearch])
|
320
550
|
end
|
@@ -324,6 +554,15 @@ module Core
|
|
324
554
|
index_version = index_versions.max # gets the largest suffix number
|
325
555
|
"#{alias_name}-v#{index_version}"
|
326
556
|
end
|
557
|
+
|
558
|
+
def update_filter_validation(filter, domain_validations)
|
559
|
+
domain = filter[:domain]
|
560
|
+
|
561
|
+
if domain_validations.key?(domain)
|
562
|
+
new_validation_state = { :draft => { :validation => domain_validations[domain] } }
|
563
|
+
filter.deep_merge!(new_validation_state)
|
564
|
+
end
|
565
|
+
end
|
327
566
|
end
|
328
567
|
end
|
329
568
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'core/filtering'
|
10
|
+
require 'utility/filtering'
|
11
|
+
|
12
|
+
module Core
|
13
|
+
module Filtering
|
14
|
+
class PostProcessEngine
|
15
|
+
attr_reader :rules
|
16
|
+
|
17
|
+
def initialize(job_description)
|
18
|
+
@rules = ordered_rules(job_description.dig('connector', 'filtering'))
|
19
|
+
end
|
20
|
+
|
21
|
+
def process(document)
|
22
|
+
@rules.each do |rule|
|
23
|
+
if rule.match?(document.stringify_keys)
|
24
|
+
return PostProcessResult.new(document, rule)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
PostProcessResult.new(document, SimpleRule::DEFAULT_RULE)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def ordered_rules(job_filtering)
|
33
|
+
job_rules = Utility::Filtering.extract_filter(job_filtering)['rules']
|
34
|
+
sorted_rules = job_rules.sort_by { |rule| rule['order'] }.reject { |rule| rule['id'] == Core::Filtering::SimpleRule::DEFAULT_RULE_ID }
|
35
|
+
sorted_rules.each_with_object([]) { |rule, output| output << SimpleRule.new(rule) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Filtering
|
13
|
+
class PostProcessResult
|
14
|
+
attr_reader :document, :matching_rule
|
15
|
+
|
16
|
+
def initialize(document, matching_rule)
|
17
|
+
@document = document
|
18
|
+
@matching_rule = matching_rule
|
19
|
+
Utility::Logger.debug("Document '#{document['id']}' matched filtering rule: #{matching_rule.id}. It will be #{matching_rule.policy}d")
|
20
|
+
end
|
21
|
+
|
22
|
+
def is_include?
|
23
|
+
matching_rule.is_include?
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,141 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require 'utility/logger'
|
10
|
+
|
11
|
+
module Core
|
12
|
+
module Filtering
|
13
|
+
class SimpleRule
|
14
|
+
DEFAULT_RULE_ID = 'DEFAULT'
|
15
|
+
|
16
|
+
class Policy
|
17
|
+
INCLUDE = 'include'
|
18
|
+
EXCLUDE = 'exclude'
|
19
|
+
end
|
20
|
+
|
21
|
+
class Rule
|
22
|
+
REGEX = 'regex'
|
23
|
+
EQUALS = 'equals'
|
24
|
+
STARTS_WITH = 'starts_with'
|
25
|
+
ENDS_WITH = 'ends_with'
|
26
|
+
CONTAINS = 'contains'
|
27
|
+
LESS_THAN = '<'
|
28
|
+
GREATER_THAN = '>'
|
29
|
+
end
|
30
|
+
|
31
|
+
attr_reader :policy, :field, :rule, :value, :id
|
32
|
+
|
33
|
+
def initialize(rule_hash)
|
34
|
+
@policy = rule_hash.fetch('policy')
|
35
|
+
@field = rule_hash.fetch('field')
|
36
|
+
@rule = rule_hash.fetch('rule')
|
37
|
+
@value = rule_hash.fetch('value')
|
38
|
+
@id = rule_hash.fetch('id')
|
39
|
+
@rule_hash = rule_hash
|
40
|
+
rescue KeyError => e
|
41
|
+
raise "#{e.key} is required"
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.from_args(id, policy, field, rule, value)
|
45
|
+
SimpleRule.new(
|
46
|
+
{
|
47
|
+
'id' => id,
|
48
|
+
'policy' => policy,
|
49
|
+
'field' => field,
|
50
|
+
'rule' => rule,
|
51
|
+
'value' => value
|
52
|
+
}
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
56
|
+
DEFAULT_RULE = SimpleRule.new(
|
57
|
+
'policy' => 'include',
|
58
|
+
'field' => '_',
|
59
|
+
'rule' => 'regex',
|
60
|
+
'value' => '.*',
|
61
|
+
'id' => SimpleRule::DEFAULT_RULE_ID
|
62
|
+
)
|
63
|
+
|
64
|
+
def match?(document)
|
65
|
+
return true if id == DEFAULT_RULE_ID
|
66
|
+
doc_value = document[field]
|
67
|
+
return false if doc_value.nil?
|
68
|
+
coerced_value = coerce(doc_value)
|
69
|
+
case rule
|
70
|
+
when Rule::EQUALS
|
71
|
+
case coerced_value
|
72
|
+
when Integer
|
73
|
+
doc_value == coerced_value
|
74
|
+
when DateTime, Time
|
75
|
+
doc_value.to_s == coerced_value.to_s
|
76
|
+
else
|
77
|
+
doc_value.to_s == coerced_value
|
78
|
+
end
|
79
|
+
when Rule::STARTS_WITH
|
80
|
+
doc_value.to_s.start_with?(value)
|
81
|
+
when Rule::ENDS_WITH
|
82
|
+
doc_value.to_s.end_with?(value)
|
83
|
+
when Rule::CONTAINS
|
84
|
+
doc_value.to_s.include?(value)
|
85
|
+
when Rule::REGEX
|
86
|
+
doc_value.to_s.match(/#{value}/)
|
87
|
+
when Rule::LESS_THAN
|
88
|
+
doc_value < coerced_value
|
89
|
+
when Rule::GREATER_THAN
|
90
|
+
doc_value > coerced_value
|
91
|
+
else
|
92
|
+
false
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def coerce(doc_value)
|
97
|
+
case doc_value
|
98
|
+
when String
|
99
|
+
value.to_s
|
100
|
+
when Integer
|
101
|
+
value.to_i
|
102
|
+
when DateTime, Time
|
103
|
+
to_date(value)
|
104
|
+
when TrueClass, FalseClass # Ruby doesn't have a Boolean type, TIL
|
105
|
+
to_bool(value).to_s
|
106
|
+
else
|
107
|
+
value.to_s
|
108
|
+
end
|
109
|
+
rescue StandardError => e
|
110
|
+
Utility::Logger.debug("Failed to coerce value '#{value}' (#{value.class}) based on document value '#{doc_value}' (#{doc_value.class}) due to error: #{e.class}: #{e.message}")
|
111
|
+
value.to_s
|
112
|
+
end
|
113
|
+
|
114
|
+
def is_include?
|
115
|
+
policy == Policy::INCLUDE
|
116
|
+
end
|
117
|
+
|
118
|
+
def is_exclude?
|
119
|
+
policy == Policy::EXCLUDE
|
120
|
+
end
|
121
|
+
|
122
|
+
def to_h
|
123
|
+
@rule_hash
|
124
|
+
end
|
125
|
+
|
126
|
+
private
|
127
|
+
|
128
|
+
def to_bool(str)
|
129
|
+
return true if str == true || str =~ (/^(true|t|yes|y|on|1)$/i)
|
130
|
+
return false if str == false || str.blank? || str =~ (/^(false|f|no|n|off|0)$/i)
|
131
|
+
raise ArgumentError.new("invalid value for Boolean: \"#{str}\"")
|
132
|
+
end
|
133
|
+
|
134
|
+
def to_date(str)
|
135
|
+
DateTime.parse(str)
|
136
|
+
rescue ArgumentError
|
137
|
+
Time.at(str.to_i) # try with it as an int string of millis
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|