connectors_utility 8.6.0.7 → 8.7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/connectors/crawler/scheduler.rb +36 -0
- data/lib/core/connector_job.rb +4 -5
- data/lib/core/connector_settings.rb +26 -0
- data/lib/core/elastic_connector_actions.rb +32 -4
- data/lib/core/scheduler.rb +66 -51
- data/lib/utility/constants.rb +0 -2
- data/lib/utility/es_client.rb +4 -0
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8bde5d9fcfd7af80dd1a20bc3fdffb3e509af46fc492607fa963858aacdb79bc
|
4
|
+
data.tar.gz: b3f26fba69d08e1add58b476a37a74f3fa855790d9bcea05c7a4f5ed3b1fd9bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1eb4c63b6ae46d11b8b8e01224e1e1943a1971b7e12054978ebab97c99939fb1d0316b3ff06912d4908e8f34df482477e4e8d1f9b53c02fcebc809e27b597d2a
|
7
|
+
data.tar.gz: 2300a3a9c32ed95a1c25a54fba4737bb052caee10756887f2ac114948ec8a8ba0df066ee30420f5e3d64e0f5855df7e487f4782bc242e27ea66b592c6c60dbe6
|
@@ -22,11 +22,47 @@ module Connectors
|
|
22
22
|
[]
|
23
23
|
end
|
24
24
|
|
25
|
+
def when_triggered
|
26
|
+
loop do
|
27
|
+
connector_settings.each do |cs|
|
28
|
+
# crawler only supports :sync
|
29
|
+
if sync_triggered?(cs)
|
30
|
+
yield cs, :sync, nil
|
31
|
+
next
|
32
|
+
end
|
33
|
+
|
34
|
+
schedule_key = custom_schedule_triggered(cs)
|
35
|
+
yield cs, :sync, schedule_key if schedule_key
|
36
|
+
end
|
37
|
+
rescue *Utility::AUTHORIZATION_ERRORS => e
|
38
|
+
log_authorization_error(e)
|
39
|
+
rescue StandardError => e
|
40
|
+
log_standard_error(e)
|
41
|
+
ensure
|
42
|
+
if @is_shutting_down
|
43
|
+
break
|
44
|
+
end
|
45
|
+
sleep_for_poll_interval
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
25
49
|
private
|
26
50
|
|
27
51
|
def connector_registered?(service_type)
|
28
52
|
service_type == 'elastic-crawler'
|
29
53
|
end
|
54
|
+
|
55
|
+
# custom scheduling has no ordering, so the first-found schedule is returned
|
56
|
+
def custom_schedule_triggered(cs)
|
57
|
+
cs.custom_scheduling_settings.each do |key, custom_scheduling|
|
58
|
+
identifier = "#{cs.formatted} - #{custom_scheduling[:name]}"
|
59
|
+
if schedule_triggered?(custom_scheduling, identifier)
|
60
|
+
return key
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
nil
|
65
|
+
end
|
30
66
|
end
|
31
67
|
end
|
32
68
|
end
|
data/lib/core/connector_job.rb
CHANGED
@@ -15,7 +15,7 @@ require 'utility'
|
|
15
15
|
module Core
|
16
16
|
class ConnectorJob
|
17
17
|
DEFAULT_PAGE_SIZE = 100
|
18
|
-
|
18
|
+
IDLE_THRESHOLD = 60
|
19
19
|
|
20
20
|
def self.fetch_by_id(job_id)
|
21
21
|
es_response = ElasticConnectorActions.get_job(job_id)
|
@@ -36,8 +36,7 @@ module Core
|
|
36
36
|
fetch_jobs_by_query(query, page_size)
|
37
37
|
end
|
38
38
|
|
39
|
-
def self.orphaned_jobs(page_size = DEFAULT_PAGE_SIZE)
|
40
|
-
connector_ids = ConnectorSettings.fetch_all_connectors.map(&:id)
|
39
|
+
def self.orphaned_jobs(connector_ids = [], page_size = DEFAULT_PAGE_SIZE)
|
41
40
|
query = { bool: { must_not: { terms: { 'connector.id': connector_ids } } } }
|
42
41
|
fetch_jobs_by_query(query, page_size)
|
43
42
|
end
|
@@ -47,7 +46,7 @@ module Core
|
|
47
46
|
ElasticConnectorActions.delete_jobs_by_query(query)
|
48
47
|
end
|
49
48
|
|
50
|
-
def self.
|
49
|
+
def self.idle_jobs(connector_id = nil, page_size = DEFAULT_PAGE_SIZE)
|
51
50
|
connector_ids = if connector_id
|
52
51
|
[connector_id]
|
53
52
|
else
|
@@ -58,7 +57,7 @@ module Core
|
|
58
57
|
filter: [
|
59
58
|
{ terms: { 'connector.id': connector_ids } },
|
60
59
|
{ terms: { status: Connectors::SyncStatus::ACTIVE_STATUSES } },
|
61
|
-
{ range: { last_seen: { lte: "now-#{
|
60
|
+
{ range: { last_seen: { lte: "now-#{IDLE_THRESHOLD}s" } } }
|
62
61
|
]
|
63
62
|
}
|
64
63
|
}
|
@@ -64,6 +64,24 @@ module Core
|
|
64
64
|
@elasticsearch_response[:_source][property_name]
|
65
65
|
end
|
66
66
|
|
67
|
+
def features
|
68
|
+
self[:features] || {}
|
69
|
+
end
|
70
|
+
|
71
|
+
# .dig version is the modern features way of doing things,
|
72
|
+
# Right-hand of OR operator is legacy features support
|
73
|
+
# When this is fixed with a migration, we can go ahead
|
74
|
+
def filtering_rule_feature_enabled?
|
75
|
+
!!features.dig(:sync_rules, :basic, :enabled) || !!features[:filtering_rules]
|
76
|
+
end
|
77
|
+
def filtering_advanced_config_feature_enabled?
|
78
|
+
!!features.dig(:sync_rules, :advanced, :enabled) || !!features[:filtering_advanced_config]
|
79
|
+
end
|
80
|
+
|
81
|
+
def any_filtering_feature_enabled?
|
82
|
+
filtering_rule_feature_enabled? || filtering_advanced_config_feature_enabled?
|
83
|
+
end
|
84
|
+
|
67
85
|
def index_name
|
68
86
|
self[:index_name]
|
69
87
|
end
|
@@ -88,10 +106,18 @@ module Core
|
|
88
106
|
self[:scheduling]
|
89
107
|
end
|
90
108
|
|
109
|
+
def custom_scheduling_settings
|
110
|
+
self[:custom_scheduling]
|
111
|
+
end
|
112
|
+
|
91
113
|
def sync_now?
|
92
114
|
self[:sync_now] == true
|
93
115
|
end
|
94
116
|
|
117
|
+
def last_synced
|
118
|
+
self[:last_synced]
|
119
|
+
end
|
120
|
+
|
95
121
|
def filtering
|
96
122
|
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
97
123
|
filtering = @elasticsearch_response.dig(:_source, :filtering)
|
@@ -157,12 +157,37 @@ module Core
|
|
157
157
|
)
|
158
158
|
end
|
159
159
|
|
160
|
-
def
|
160
|
+
def update_connector_sync_start(connector_id)
|
161
161
|
doc = connector_with_concurrency_control(connector_id)
|
162
162
|
|
163
|
+
body = {
|
164
|
+
last_sync_status: Connectors::SyncStatus::IN_PROGRESS,
|
165
|
+
last_sync_error: nil,
|
166
|
+
status: Connectors::ConnectorStatus::CONNECTED
|
167
|
+
}
|
168
|
+
|
163
169
|
update_connector_fields(
|
164
170
|
connector_id,
|
165
|
-
|
171
|
+
body,
|
172
|
+
doc[:seq_no],
|
173
|
+
doc[:primary_term]
|
174
|
+
)
|
175
|
+
end
|
176
|
+
|
177
|
+
def update_connector_custom_scheduling_last_synced(connector_id, schedule_key)
|
178
|
+
doc = connector_with_concurrency_control(connector_id)
|
179
|
+
|
180
|
+
body = {
|
181
|
+
:custom_scheduling => {
|
182
|
+
schedule_key => {
|
183
|
+
:last_synced => Time.now
|
184
|
+
}
|
185
|
+
}
|
186
|
+
}
|
187
|
+
|
188
|
+
update_connector_fields(
|
189
|
+
connector_id,
|
190
|
+
body,
|
166
191
|
doc[:seq_no],
|
167
192
|
doc[:primary_term]
|
168
193
|
)
|
@@ -314,9 +339,11 @@ module Core
|
|
314
339
|
# Creation of connector index should be handled by Kibana, this method is only used by ftest.rb
|
315
340
|
def ensure_connectors_index_exists
|
316
341
|
mappings = {
|
342
|
+
:dynamic => false,
|
317
343
|
:properties => {
|
318
344
|
:api_key_id => { :type => :keyword },
|
319
345
|
:configuration => { :type => :object },
|
346
|
+
:custom_schedule => { :type => :object },
|
320
347
|
:description => { :type => :text },
|
321
348
|
:error => { :type => :keyword },
|
322
349
|
:features => {
|
@@ -434,6 +461,7 @@ module Core
|
|
434
461
|
# Creation of job index should be handled by Kibana, this method is only used by ftest.rb
|
435
462
|
def ensure_job_index_exists
|
436
463
|
mappings = {
|
464
|
+
:dynamic => false,
|
437
465
|
:properties => {
|
438
466
|
:cancelation_requested_at => { :type => :date },
|
439
467
|
:canceled_at => { :type => :date },
|
@@ -511,8 +539,8 @@ module Core
|
|
511
539
|
end
|
512
540
|
|
513
541
|
def document_count(index_name)
|
514
|
-
client.indices.refresh(:index => index_name)
|
515
|
-
client.count(:index => index_name)['count']
|
542
|
+
client.indices.refresh(:index => index_name, :ignore_unavailable => true)
|
543
|
+
client.count(:index => index_name, :ignore_unavailable => true)['count']
|
516
544
|
end
|
517
545
|
|
518
546
|
private
|
data/lib/core/scheduler.rb
CHANGED
@@ -44,17 +44,14 @@ module Core
|
|
44
44
|
end
|
45
45
|
end
|
46
46
|
rescue *Utility::AUTHORIZATION_ERRORS => e
|
47
|
-
|
47
|
+
log_authorization_error(e)
|
48
48
|
rescue StandardError => e
|
49
|
-
|
49
|
+
log_standard_error(e)
|
50
50
|
ensure
|
51
51
|
if @is_shutting_down
|
52
52
|
break
|
53
53
|
end
|
54
|
-
|
55
|
-
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
56
|
-
sleep(@poll_interval)
|
57
|
-
end
|
54
|
+
sleep_for_poll_interval
|
58
55
|
end
|
59
56
|
end
|
60
57
|
|
@@ -83,51 +80,7 @@ module Core
|
|
83
80
|
return true
|
84
81
|
end
|
85
82
|
|
86
|
-
|
87
|
-
scheduling_settings = connector_settings.scheduling_settings
|
88
|
-
unless scheduling_settings.present? && scheduling_settings[:enabled] == true
|
89
|
-
Utility::Logger.debug("#{connector_settings.formatted.capitalize} scheduling is disabled.")
|
90
|
-
return false
|
91
|
-
end
|
92
|
-
|
93
|
-
current_schedule = scheduling_settings[:interval]
|
94
|
-
|
95
|
-
# Don't sync if there is no actual scheduling interval
|
96
|
-
if current_schedule.nil? || current_schedule.empty?
|
97
|
-
Utility::Logger.warn("No sync schedule configured for #{connector_settings.formatted}.")
|
98
|
-
return false
|
99
|
-
end
|
100
|
-
|
101
|
-
current_schedule = begin
|
102
|
-
Utility::Cron.quartz_to_crontab(current_schedule)
|
103
|
-
rescue StandardError => e
|
104
|
-
Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
|
105
|
-
return false
|
106
|
-
end
|
107
|
-
cron_parser = Fugit::Cron.parse(current_schedule)
|
108
|
-
|
109
|
-
# Don't sync if the scheduling interval is non-parsable
|
110
|
-
unless cron_parser
|
111
|
-
Utility::Logger.error("Unable to parse sync schedule for #{connector_settings.formatted}: expression #{current_schedule} is not a valid Quartz Cron definition.")
|
112
|
-
return false
|
113
|
-
end
|
114
|
-
|
115
|
-
# We want to sync when sync never actually happened
|
116
|
-
last_synced = connector_settings[:last_synced]
|
117
|
-
if last_synced.nil? || last_synced.empty?
|
118
|
-
Utility::Logger.info("#{connector_settings.formatted.capitalize} has never synced yet, running initial sync.")
|
119
|
-
return true
|
120
|
-
end
|
121
|
-
|
122
|
-
next_trigger_time = cron_parser.next_time(Time.parse(last_synced))
|
123
|
-
|
124
|
-
# Sync if next trigger for the connector is in past
|
125
|
-
if next_trigger_time < Time.now
|
126
|
-
Utility::Logger.info("#{connector_settings.formatted.capitalize} sync is triggered by cron schedule #{current_schedule}.")
|
127
|
-
return true
|
128
|
-
end
|
129
|
-
|
130
|
-
false
|
83
|
+
schedule_triggered?(connector_settings.scheduling_settings, connector_settings.formatted)
|
131
84
|
end
|
132
85
|
|
133
86
|
def heartbeat_triggered?(connector_settings)
|
@@ -148,6 +101,12 @@ module Core
|
|
148
101
|
end
|
149
102
|
|
150
103
|
def filtering_validation_triggered?(connector_settings)
|
104
|
+
unless connector_settings.any_filtering_feature_enabled?
|
105
|
+
Utility::Logger.debug("#{connector_settings.formatted} all filtering features are disabled. Skip filtering validation.")
|
106
|
+
|
107
|
+
return false
|
108
|
+
end
|
109
|
+
|
151
110
|
filtering = connector_settings.filtering
|
152
111
|
|
153
112
|
unless filtering.present?
|
@@ -189,5 +148,61 @@ module Core
|
|
189
148
|
false
|
190
149
|
end
|
191
150
|
end
|
151
|
+
|
152
|
+
def schedule_triggered?(scheduling_settings, identifier)
|
153
|
+
# Don't sync if sync is explicitly disabled
|
154
|
+
unless scheduling_settings.present? && scheduling_settings[:enabled] == true
|
155
|
+
Utility::Logger.debug("#{identifier.capitalize} scheduling is disabled.")
|
156
|
+
return false
|
157
|
+
end
|
158
|
+
|
159
|
+
current_schedule = scheduling_settings[:interval]
|
160
|
+
|
161
|
+
# Don't sync if there is no actual scheduling interval
|
162
|
+
if current_schedule.nil? || current_schedule.empty?
|
163
|
+
Utility::Logger.warn("No sync schedule configured for #{identifier}.")
|
164
|
+
return false
|
165
|
+
end
|
166
|
+
|
167
|
+
current_schedule =
|
168
|
+
begin
|
169
|
+
Utility::Cron.quartz_to_crontab(current_schedule)
|
170
|
+
rescue StandardError => e
|
171
|
+
Utility::ExceptionTracking.log_exception(e, "Unable to convert quartz (#{current_schedule}) to crontab.")
|
172
|
+
return false
|
173
|
+
end
|
174
|
+
cron_parser = Fugit::Cron.parse(current_schedule)
|
175
|
+
|
176
|
+
# Don't sync if the scheduling interval is non-parsable
|
177
|
+
unless cron_parser
|
178
|
+
Utility::Logger.error("Unable to parse sync schedule for #{identifier}: expression #{current_schedule} is not a valid Quartz Cron definition.")
|
179
|
+
return false
|
180
|
+
end
|
181
|
+
|
182
|
+
next_trigger_time = cron_parser.next_time(Time.now)
|
183
|
+
|
184
|
+
# Sync if next trigger happens before the next poll
|
185
|
+
if next_trigger_time <= Time.now + @poll_interval
|
186
|
+
Utility::Logger.info("#{identifier.capitalize} sync is triggered by cron schedule #{current_schedule}.")
|
187
|
+
return true
|
188
|
+
end
|
189
|
+
|
190
|
+
false
|
191
|
+
end
|
192
|
+
|
193
|
+
def sleep_for_poll_interval
|
194
|
+
if @poll_interval > 0 && !@is_shutting_down
|
195
|
+
Utility::Logger.debug("Sleeping for #{@poll_interval} seconds in #{self.class}.")
|
196
|
+
sleep(@poll_interval)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def log_authorization_error(e)
|
201
|
+
Utility::ExceptionTracking.log_exception(e, 'Could not retrieve connectors settings due to authorization error.')
|
202
|
+
end
|
203
|
+
|
204
|
+
def log_standard_error(e)
|
205
|
+
Utility::ExceptionTracking.log_exception(e, 'Sync failed due to unexpected error.')
|
206
|
+
end
|
192
207
|
end
|
193
208
|
end
|
data/lib/utility/constants.rb
CHANGED
@@ -16,8 +16,6 @@ module Utility
|
|
16
16
|
JOB_INDEX = '.elastic-connectors-sync-jobs'
|
17
17
|
CONTENT_INDEX_PREFIX = 'search-'
|
18
18
|
CRAWLER_SERVICE_TYPE = 'elastic-crawler'
|
19
|
-
FILTERING_RULES_FEATURE = 'filtering_rules'
|
20
|
-
FILTERING_ADVANCED_FEATURE = 'filtering_advanced_config'
|
21
19
|
|
22
20
|
# Maximum number of operations in BULK Elasticsearch operation that will ingest the data
|
23
21
|
DEFAULT_MAX_INGESTION_QUEUE_SIZE = 500
|
data/lib/utility/es_client.rb
CHANGED
@@ -43,6 +43,10 @@ module Utility
|
|
43
43
|
configs[:transport_options] = es_config[:transport_options] if es_config[:transport_options]
|
44
44
|
configs[:ca_fingerprint] = es_config[:ca_fingerprint] if es_config[:ca_fingerprint]
|
45
45
|
|
46
|
+
# headers
|
47
|
+
# these are necessary for cloud-hosted native connectors
|
48
|
+
configs[:headers] = es_config[:headers].to_h if es_config[:headers]
|
49
|
+
|
46
50
|
# if log or trace is activated, we use the application logger
|
47
51
|
configs[:logger] = if configs[:log] || configs[:trace]
|
48
52
|
Utility::Logger.logger
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.
|
4
|
+
version: 8.7.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 5.2
|
19
|
+
version: '5.2'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 5.2
|
26
|
+
version: '5.2'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: ecs-logging
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -132,7 +132,7 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
132
132
|
licenses:
|
133
133
|
- Elastic-2.0
|
134
134
|
metadata:
|
135
|
-
revision:
|
135
|
+
revision: ae6292137eef9acac1259c5e7e71a3d0e149210b
|
136
136
|
repository: https://github.com/elastic/connectors-ruby
|
137
137
|
post_install_message:
|
138
138
|
rdoc_options: []
|