connectors_utility 8.6.0.7 → 8.7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/connectors/crawler/scheduler.rb +36 -0
- data/lib/core/connector_job.rb +4 -5
- data/lib/core/connector_settings.rb +26 -0
- data/lib/core/elastic_connector_actions.rb +32 -4
- data/lib/core/scheduler.rb +66 -51
- data/lib/utility/constants.rb +0 -2
- data/lib/utility/es_client.rb +4 -0
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8bde5d9fcfd7af80dd1a20bc3fdffb3e509af46fc492607fa963858aacdb79bc
|
4
|
+
data.tar.gz: b3f26fba69d08e1add58b476a37a74f3fa855790d9bcea05c7a4f5ed3b1fd9bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1eb4c63b6ae46d11b8b8e01224e1e1943a1971b7e12054978ebab97c99939fb1d0316b3ff06912d4908e8f34df482477e4e8d1f9b53c02fcebc809e27b597d2a
|
7
|
+
data.tar.gz: 2300a3a9c32ed95a1c25a54fba4737bb052caee10756887f2ac114948ec8a8ba0df066ee30420f5e3d64e0f5855df7e487f4782bc242e27ea66b592c6c60dbe6
|
@@ -22,11 +22,47 @@ module Connectors
|
|
22
22
|
[]
|
23
23
|
end
|
24
24
|
|
25
|
+
def when_triggered
|
26
|
+
loop do
|
27
|
+
connector_settings.each do |cs|
|
28
|
+
# crawler only supports :sync
|
29
|
+
if sync_triggered?(cs)
|
30
|
+
yield cs, :sync, nil
|
31
|
+
next
|
32
|
+
end
|
33
|
+
|
34
|
+
schedule_key = custom_schedule_triggered(cs)
|
35
|
+
yield cs, :sync, schedule_key if schedule_key
|
36
|
+
end
|
37
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
38
|
+
log_authorization_error(e)
|
39
|
+
rescue StandardError => e
|
40
|
+
log_standard_error(e)
|
41
|
+
ensure
|
42
|
+
if @is_shutting_down
|
43
|
+
break
|
44
|
+
end
|
45
|
+
sleep_for_poll_interval
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
25
49
|
private
|
26
50
|
|
27
51
|
def connector_registered?(service_type)
|
28
52
|
service_type == 'elastic-crawler'
|
29
53
|
end
|
54
|
+
|
55
|
+
# custom scheduling has no ordering, so the first-found schedule is returned
|
56
|
+
def custom_schedule_triggered(cs)
|
57
|
+
cs.custom_scheduling_settings.each do |key, custom_scheduling|
|
58
|
+
identifier = "#{cs.formatted} - #{custom_scheduling[:name]}"
|
59
|
+
if schedule_triggered?(custom_scheduling, identifier)
|
60
|
+
return key
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
nil
|
65
|
+
end
|
30
66
|
end
|
31
67
|
end
|
32
68
|
end
|
data/lib/core/connector_job.rb
CHANGED
@@ -15,7 +15,7 @@ require 'utility'
|
|
15
15
|
module Core
|
16
16
|
class ConnectorJob
|
17
17
|
DEFAULT_PAGE_SIZE = 100
|
18
|
-
|
18
|
+
IDLE_THRESHOLD = 60
|
19
19
|
|
20
20
|
def self.fetch_by_id(job_id)
|
21
21
|
es_response = ElasticConnectorActions.get_job(job_id)
|
@@ -36,8 +36,7 @@ module Core
|
|
36
36
|
fetch_jobs_by_query(query, page_size)
|
37
37
|
end
|
38
38
|
|
39
|
-
def self.orphaned_jobs(page_size = DEFAULT_PAGE_SIZE)
|
40
|
-
connector_ids = ConnectorSettings.fetch_all_connectors.map(&:id)
|
39
|
+
def self.orphaned_jobs(connector_ids = [], page_size = DEFAULT_PAGE_SIZE)
|
41
40
|
query = { bool: { must_not: { terms: { 'connector.id': connector_ids } } } }
|
42
41
|
fetch_jobs_by_query(query, page_size)
|
43
42
|
end
|
@@ -47,7 +46,7 @@ module Core
|
|
47
46
|
ElasticConnectorActions.delete_jobs_by_query(query)
|
48
47
|
end
|
49
48
|
|
50
|
-
def self.
|
49
|
+
def self.idle_jobs(connector_id = nil, page_size = DEFAULT_PAGE_SIZE)
|
51
50
|
connector_ids = if connector_id
|
52
51
|
[connector_id]
|
53
52
|
else
|
@@ -58,7 +57,7 @@ module Core
|
|
58
57
|
filter: [
|
59
58
|
{ terms: { 'connector.id': connector_ids } },
|
60
59
|
{ terms: { status: Connectors::SyncStatus::ACTIVE_STATUSES } },
|
61
|
-
{ range: { last_seen: { lte: "now-#{
|
60
|
+
{ range: { last_seen: { lte: "now-#{IDLE_THRESHOLD}s" } } }
|
62
61
|
]
|
63
62
|
}
|
64
63
|
}
|
@@ -64,6 +64,24 @@ module Core
|
|
64
64
|
@elasticsearch_response[:_source][property_name]
|
65
65
|
end
|
66
66
|
|
67
|
+
def features
|
68
|
+
self[:features] || {}
|
69
|
+
end
|
70
|
+
|
71
|
+
# .dig version is the modern features way of doing things,
|
72
|
+
# Right-hand of OR operator is legacy features support
|
73
|
+
# When this is fixed with a migration, we can go ahead
|
74
|
+
def filtering_rule_feature_enabled?
|
75
|
+
!!features.dig(:sync_rules, :basic, :enabled) || !!features[:filtering_rules]
|
76
|
+
end
|
77
|
+
def filtering_advanced_config_feature_enabled?
|
78
|
+
!!features.dig(:sync_rules, :advanced, :enabled) || !!features[:filtering_advanced_config]
|
79
|
+
end
|
80
|
+
|
81
|
+
def any_filtering_feature_enabled?
|
82
|
+
filtering_rule_feature_enabled? || filtering_advanced_config_feature_enabled?
|
83
|
+
end
|
84
|
+
|
67
85
|
def index_name
|
68
86
|
self[:index_name]
|
69
87
|
end
|
@@ -88,10 +106,18 @@ module Core
|
|
88
106
|
self[:scheduling]
|
89
107
|
end
|
90
108
|
|
109
|
+
def custom_scheduling_settings
|
110
|
+
self[:custom_scheduling]
|
111
|
+
end
|
112
|
+
|
91
113
|
def sync_now?
|
92
114
|
self[:sync_now] == true
|
93
115
|
end
|
94
116
|
|
117
|
+
def last_synced
|
118
|
+
self[:last_synced]
|
119
|
+
end
|
120
|
+
|
95
121
|
def filtering
|
96
122
|
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
97
123
|
filtering = @elasticsearch_response.dig(:_source, :filtering)
|
@@ -157,12 +157,37 @@ module Core
|
|
157
157
|
)
|
158
158
|
end
|
159
159
|
|
160
|
-
def
|
160
|
+
def update_connector_sync_start(connector_id)
|
161
161
|
doc = connector_with_concurrency_control(connector_id)
|
162
162
|
|
163
|
+
body = {
|
164
|
+
last_sync_status: Connectors::SyncStatus::IN_PROGRESS,
|
165
|
+
last_sync_error: nil,
|
166
|
+
status: Connectors::ConnectorStatus::CONNECTED
|
167
|
+
}
|
168
|
+
|
163
169
|
update_connector_fields(
|
164
170
|
connector_id,
|
165
|
-
|
171
|
+
body,
|
172
|
+
doc[:seq_no],
|
173
|
+
doc[:primary_term]
|
174
|
+
)
|
175
|
+
end
|
176
|
+
|
177
|
+
def update_connector_custom_scheduling_last_synced(connector_id, schedule_key)
|
178
|
+
doc = connector_with_concurrency_control(connector_id)
|
179
|
+
|
180
|
+
body = {
|
181
|
+
:custom_scheduling => {
|
182
|
+
schedule_key => {
|
183
|
+
:last_synced => Time.now
|
184
|
+
}
|
185
|
+
}
|
186
|
+
}
|
187
|
+
|
188
|
+
update_connector_fields(
|
189
|
+
connector_id,
|
190
|
+
body,
|
166
191
|
doc[:seq_no],
|
167
192
|
doc[:primary_term]
|
168
193
|
)
|
@@ -314,9 +339,11 @@ module Core
|
|
314
339
|
# Creation of connector index should be handled by Kibana, this method is only used by ftest.rb
|
315
340
|
def ensure_connectors_index_exists
|
316
341
|
mappings = {
|
342
|
+
:dynamic => false,
|
317
343
|
:properties => {
|
318
344
|
:api_key_id => { :type => :keyword },
|
319
345
|
:configuration => { :type => :object },
|
346
|
+
:custom_schedule => { :type => :object },
|
320
347
|
:description => { :type => :text },
|
321
348
|
:error => { :type => :keyword },
|
322
349
|
:features => {
|
@@ -434,6 +461,7 @@ module Core
|
|
434
461
|
# Creation of job index should be handled by Kibana, this method is only used by ftest.rb
|
435
462
|
def ensure_job_index_exists
|
436
463
|
mappings = {
|
464
|
+
:dynamic => false,
|
437
465
|
:properties => {
|
438
466
|
:cancelation_requested_at => { :type => :date },
|
439
467
|
:canceled_at => { :type => :date },
|
@@ -511,8 +539,8 @@ module Core
|
|
511
539
|
end
|
512
540
|
|
513
541
|
def document_count(index_name)
|
514
|
-
client.indices.refresh(:index => index_name)
|
515
|
-
client.count(:index => index_name)['count']
|
542
|
+
client.indices.refresh(:index => index_name, :ignore_unavailable => true)
|
543
|
+
client.count(:index => index_name, :ignore_unavailable => true)['count']
|
516
544
|
end
|
517
545
|
|
518
546
|
private
|
data/lib/core/scheduler.rb
CHANGED
@@ -44,17 +44,14 @@ module Core
|
|
44
44
|
end
|
45
45
|
end
|
46
46
|
rescue *Utility::AUTHORIZATION_ERRORS => e
|
47
|
-
|
47
|
+
log_authorization_error(e)
|
48
48
|
rescue StandardError => e
|
49
|
-
|
49
|
+
log_standard_error(e)
|
50
50
|
ensure
|
51
51
|
if @is_shutting_down
|
52
52
|
break
|
53
53
|
end
|
54
|
-
|
55
|
-
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
56
|
-
sleep(@poll_interval)
|
57
|
-
end
|
54
|
+
sleep_for_poll_interval
|
58
55
|
end
|
59
56
|
end
|
60
57
|
|
@@ -83,51 +80,7 @@ module Core
|
|
83
80
|
return true
|
84
81
|
end
|
85
82
|
|
86
|
-
|
87
|
-
scheduling_settings = connector_settings.scheduling_settings
|
88
|
-
unless scheduling_settings.present? && scheduling_settings[:enabled] == true
|
89
|
-
Utility::Logger.debug("#{connector_settings.formatted.capitalize} scheduling is disabled.")
|
90
|
-
return false
|
91
|
-
end
|
92
|
-
|
93
|
-
current_schedule = scheduling_settings[:interval]
|
94
|
-
|
95
|
-
# Don't sync if there is no actual scheduling interval
|
96
|
-
if current_schedule.nil? || current_schedule.empty?
|
97
|
-
Utility::Logger.warn("No sync schedule configured for #{connector_settings.formatted}.")
|
98
|
-
return false
|
99
|
-
end
|
100
|
-
|
101
|
-
current_schedule = begin
|
102
|
-
Utility::Cron.quartz_to_crontab(current_schedule)
|
103
|
-
rescue StandardError => e
|
104
|
-
Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
|
105
|
-
return false
|
106
|
-
end
|
107
|
-
cron_parser = Fugit::Cron.parse(current_schedule)
|
108
|
-
|
109
|
-
# Don't sync if the scheduling interval is non-parsable
|
110
|
-
unless cron_parser
|
111
|
-
Utility::Logger.error("Unable to parse sync schedule for #{connector_settings.formatted}: expression #{current_schedule} is not a valid Quartz Cron definition.")
|
112
|
-
return false
|
113
|
-
end
|
114
|
-
|
115
|
-
# We want to sync when sync never actually happened
|
116
|
-
last_synced = connector_settings[:last_synced]
|
117
|
-
if last_synced.nil? || last_synced.empty?
|
118
|
-
Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
|
119
|
-
return true
|
120
|
-
end
|
121
|
-
|
122
|
-
next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
|
123
|
-
|
124
|
-
# Sync if next trigger for the connector is in past
|
125
|
-
if next_trigger_time < Time.now
|
126
|
-
Utility::Logger.info("#{connector_settings.formatted.capitalize} sync is triggered by cron schedule #{current_schedule}.")
|
127
|
-
return true
|
128
|
-
end
|
129
|
-
|
130
|
-
false
|
83
|
+
schedule_triggered?(connector_settings.scheduling_settings, connector_settings.formatted)
|
131
84
|
end
|
132
85
|
|
133
86
|
def heartbeat_triggered?(connector_settings)
|
@@ -148,6 +101,12 @@ module Core
|
|
148
101
|
end
|
149
102
|
|
150
103
|
def filtering_validation_triggered?(connector_settings)
|
104
|
+
unless connector_settings.any_filtering_feature_enabled?
|
105
|
+
Utility::Logger.debug("#{connector_settings.formatted} all filtering features are disabled. Skip filtering validation.")
|
106
|
+
|
107
|
+
return false
|
108
|
+
end
|
109
|
+
|
151
110
|
filtering = connector_settings.filtering
|
152
111
|
|
153
112
|
unless filtering.present?
|
@@ -189,5 +148,61 @@ module Core
|
|
189
148
|
false
|
190
149
|
end
|
191
150
|
end
|
151
|
+
|
152
|
+
def schedule_triggered?(scheduling_settings, identifier)
|
153
|
+
# Don't sync if sync is explicitly disabled
|
154
|
+
unless scheduling_settings.present? && scheduling_settings[:enabled] == true
|
155
|
+
Utility::Logger.debug("#{identifier.capitalize} scheduling is disabled.")
|
156
|
+
return false
|
157
|
+
end
|
158
|
+
|
159
|
+
current_schedule = scheduling_settings[:interval]
|
160
|
+
|
161
|
+
# Don't sync if there is no actual scheduling interval
|
162
|
+
if current_schedule.nil? || current_schedule.empty?
|
163
|
+
Utility::Logger.warn("No sync schedule configured for #{identifier}.")
|
164
|
+
return false
|
165
|
+
end
|
166
|
+
|
167
|
+
current_schedule =
|
168
|
+
begin
|
169
|
+
Utility::Cron.quartz_to_crontab(current_schedule)
|
170
|
+
rescue StandardError => e
|
171
|
+
Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
cron_parser = Fugit::Cron.parse(current_schedule)
|
175
|
+
|
176
|
+
# Don't sync if the scheduling interval is non-parsable
|
177
|
+
unless cron_parser
|
178
|
+
Utility::Logger.error("Unable to parse sync schedule for #{identifier}: expression #{current_schedule} is not a valid Quartz Cron definition.")
|
179
|
+
return false
|
180
|
+
end
|
181
|
+
|
182
|
+
next_trigger_time = cron_parser.next_time(Time.now)
|
183
|
+
|
184
|
+
# Sync if next trigger happens before the next poll
|
185
|
+
if next_trigger_time <= Time.now + @poll_interval
|
186
|
+
Utility::Logger.info("#{identifier.capitalize} sync is triggered by cron schedule #{current_schedule}.")
|
187
|
+
return true
|
188
|
+
end
|
189
|
+
|
190
|
+
false
|
191
|
+
end
|
192
|
+
|
193
|
+
def sleep_for_poll_interval
|
194
|
+
if @poll_interval > 0 && !@is_shutting_down
|
195
|
+
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
196
|
+
sleep(@poll_interval)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def log_authorization_error(e)
|
201
|
+
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
202
|
+
end
|
203
|
+
|
204
|
+
def log_standard_error(e)
|
205
|
+
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
206
|
+
end
|
192
207
|
end
|
193
208
|
end
|
data/lib/utility/constants.rb
CHANGED
@@ -16,8 +16,6 @@ module Utility
|
|
16
16
|
JOB_INDEX = '.elastic-connectors-sync-jobs'
|
17
17
|
CONTENT_INDEX_PREFIX = 'search-'
|
18
18
|
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
|
-
FILTERING_RULES_FEATURE = 'filtering_rules'
|
20
|
-
FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
|
21
19
|
|
22
20
|
# Maximum number of operations in BULK Elasticsearch operation that will ingest the data
|
23
21
|
DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
|
data/lib/utility/es_client.rb
CHANGED
@@ -43,6 +43,10 @@ module Utility
|
|
43
43
|
configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
|
44
44
|
configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
|
45
45
|
|
46
|
+
# headers
|
47
|
+
# these are necessary for cloud-hosted native connectors
|
48
|
+
configs[:headers] = es_config[:headers].to_h if es_config[:headers]
|
49
|
+
|
46
50
|
# if log or trace is activated, we use the application logger
|
47
51
|
configs[:logger] = if configs[:log] || configs[:trace]
|
48
52
|
Utility::Logger.logger
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.
|
4
|
+
version: 8.7.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 5.2
|
19
|
+
version: '5.2'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 5.2
|
26
|
+
version: '5.2'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: ecs-logging
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -132,7 +132,7 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
132
132
|
licenses:
|
133
133
|
- Elastic-2.0
|
134
134
|
metadata:
|
135
|
-
revision:
|
135
|
+
revision: ae6292137eef9acac1259c5e7e71a3d0e149210b
|
136
136
|
repository: https://github.com/elastic/connectors-ruby
|
137
137
|
post_install_message:
|
138
138
|
rdoc_options: []
|