connectors_utility 8.6.0.6 → 8.6.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/connectors/job_trigger_method.rb +14 -0
- data/lib/connectors_utility.rb +6 -3
- data/lib/core/connector_job.rb +13 -1
- data/lib/core/connector_settings.rb +7 -12
- data/lib/core/elastic_connector_actions.rb +4 -1
- data/lib/core/scheduler.rb +1 -1
- data/lib/utility/error_monitor.rb +26 -5
- data/lib/utility/filtering.rb +4 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d2406276300699ebf07cc2890f994c3a1a9fecb9127793bcad68fb69a4f49111
|
4
|
+
data.tar.gz: 0a35b249367837148c2f7fe2eac30f74de37c5fd5610f30859c703bfbcbb76be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e0cca6acc1faffd3312213d807e77425ed006f44313c3b49faf01c8f3b31817ef7a194249581d6058cc8309773fa1142f5daf8994a2a8e24533f1b3f08f73efd
|
7
|
+
data.tar.gz: '078ebf6f05133d0b3ffe252157e72bcfaee89fb98e65d8ffde1a400c69a95f1ea4edfd7dfd66a626955aa24a0e29217cb6f51ead3e30c1c1d2792d52c7ffbb33'
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#
|
2
|
+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
3
|
+
# or more contributor license agreements. Licensed under the Elastic License;
|
4
|
+
# you may not use this file except in compliance with the Elastic License.
|
5
|
+
#
|
6
|
+
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module Connectors
|
10
|
+
class JobTriggerMethod
|
11
|
+
ON_DEMAND = 'on_demand'
|
12
|
+
SCHEDULED = 'scheduled'
|
13
|
+
end
|
14
|
+
end
|
data/lib/connectors_utility.rb
CHANGED
@@ -9,8 +9,11 @@
|
|
9
9
|
require_relative 'utility'
|
10
10
|
|
11
11
|
require_relative 'connectors/connector_status'
|
12
|
+
require_relative 'connectors/crawler/scheduler'
|
13
|
+
require_relative 'connectors/job_trigger_method'
|
12
14
|
require_relative 'connectors/sync_status'
|
13
|
-
require_relative 'core/
|
15
|
+
require_relative 'core/connector_job'
|
16
|
+
require_relative 'core/connector_settings'
|
14
17
|
require_relative 'core/elastic_connector_actions'
|
15
|
-
|
16
|
-
require_relative '
|
18
|
+
require_relative 'core/filtering/validation_status'
|
19
|
+
require_relative 'core/scheduler'
|
data/lib/core/connector_job.rb
CHANGED
@@ -142,7 +142,19 @@ module Core
|
|
142
142
|
end
|
143
143
|
|
144
144
|
def pipeline
|
145
|
-
connector_snapshot[:pipeline]
|
145
|
+
connector_snapshot[:pipeline] || {}
|
146
|
+
end
|
147
|
+
|
148
|
+
def extract_binary_content?
|
149
|
+
pipeline[:extract_binary_content]
|
150
|
+
end
|
151
|
+
|
152
|
+
def reduce_whitespace?
|
153
|
+
pipeline[:reduce_whitespace]
|
154
|
+
end
|
155
|
+
|
156
|
+
def run_ml_inference?
|
157
|
+
pipeline[:run_ml_inference]
|
146
158
|
end
|
147
159
|
|
148
160
|
def connector
|
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
10
10
|
require 'connectors/connector_status'
|
11
|
+
require 'connectors/sync_status'
|
11
12
|
require 'core/elastic_connector_actions'
|
12
13
|
require 'utility'
|
13
14
|
|
@@ -87,6 +88,10 @@ module Core
|
|
87
88
|
self[:scheduling]
|
88
89
|
end
|
89
90
|
|
91
|
+
def sync_now?
|
92
|
+
self[:sync_now] == true
|
93
|
+
end
|
94
|
+
|
90
95
|
def filtering
|
91
96
|
# assume for now, that first object in filtering array or a filter object itself is the only filtering object
|
92
97
|
filtering = @elasticsearch_response.dig(:_source, :filtering)
|
@@ -98,18 +103,6 @@ module Core
|
|
98
103
|
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :name), @connectors_meta.dig(:pipeline, :default_name), DEFAULT_REQUEST_PIPELINE)
|
99
104
|
end
|
100
105
|
|
101
|
-
def extract_binary_content?
|
102
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :extract_binary_content), @connectors_meta.dig(:pipeline, :default_extract_binary_content), DEFAULT_EXTRACT_BINARY_CONTENT)
|
103
|
-
end
|
104
|
-
|
105
|
-
def reduce_whitespace?
|
106
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :reduce_whitespace), @connectors_meta.dig(:pipeline, :default_reduce_whitespace), DEFAULT_REDUCE_WHITESPACE)
|
107
|
-
end
|
108
|
-
|
109
|
-
def run_ml_inference?
|
110
|
-
Utility::Common.return_if_present(@elasticsearch_response.dig(:_source, :pipeline, :run_ml_inference), @connectors_meta.dig(:pipeline, :default_run_ml_inference), DEFAULT_RUN_ML_INFERENCE)
|
111
|
-
end
|
112
|
-
|
113
106
|
def formatted
|
114
107
|
properties = ["ID: #{id}"]
|
115
108
|
properties << "Service type: #{service_type}" if service_type
|
@@ -139,10 +132,12 @@ module Core
|
|
139
132
|
job_status = job&.status || Connectors::SyncStatus::ERROR
|
140
133
|
job_error = job.nil? ? 'Could\'t find the job' : job.error
|
141
134
|
job_error ||= 'unknown error' if job_status == Connectors::SyncStatus::ERROR
|
135
|
+
connector_status = (job_status == Connectors::SyncStatus::ERROR ? Connectors::ConnectorStatus::ERROR : Connectors::ConnectorStatus::CONNECTED)
|
142
136
|
doc = {
|
143
137
|
:last_sync_status => job_status,
|
144
138
|
:last_synced => Time.now,
|
145
139
|
:last_sync_error => job_error,
|
140
|
+
:status => connector_status,
|
146
141
|
:error => job_error
|
147
142
|
}
|
148
143
|
if job&.terminated?
|
@@ -8,6 +8,7 @@
|
|
8
8
|
#
|
9
9
|
require 'active_support/core_ext/hash'
|
10
10
|
require 'connectors/connector_status'
|
11
|
+
require 'connectors/job_trigger_method'
|
11
12
|
require 'connectors/sync_status'
|
12
13
|
require 'utility'
|
13
14
|
require 'elastic-transport'
|
@@ -189,13 +190,15 @@ module Core
|
|
189
190
|
status: Connectors::SyncStatus::PENDING,
|
190
191
|
created_at: Time.now,
|
191
192
|
last_seen: Time.now,
|
193
|
+
trigger_method: connector_settings.sync_now? ? Connectors::JobTriggerMethod::ON_DEMAND : Connectors::JobTriggerMethod::SCHEDULED,
|
192
194
|
connector: {
|
193
195
|
id: connector_settings.id,
|
194
196
|
filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
|
195
197
|
index_name: connector_settings.index_name,
|
196
198
|
language: connector_settings[:language],
|
197
199
|
pipeline: connector_settings[:pipeline],
|
198
|
-
service_type: connector_settings.service_type
|
200
|
+
service_type: connector_settings.service_type,
|
201
|
+
configuration: connector_settings.configuration
|
199
202
|
}
|
200
203
|
}
|
201
204
|
|
data/lib/core/scheduler.rb
CHANGED
@@ -78,7 +78,7 @@ module Core
|
|
78
78
|
end
|
79
79
|
|
80
80
|
# Sync when sync_now flag is true for the connector
|
81
|
-
if connector_settings
|
81
|
+
if connector_settings.sync_now?
|
82
82
|
Utility::Logger.info("#{connector_settings.formatted.capitalize} is manually triggered to sync now.")
|
83
83
|
return true
|
84
84
|
end
|
@@ -51,7 +51,7 @@ module Utility
|
|
51
51
|
def note_success
|
52
52
|
@consecutive_error_count = 0
|
53
53
|
@success_count += 1
|
54
|
-
|
54
|
+
track_window_error(false)
|
55
55
|
end
|
56
56
|
|
57
57
|
def note_error(error, id: Time.now.to_i)
|
@@ -60,10 +60,9 @@ module Utility
|
|
60
60
|
Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
|
61
61
|
@total_error_count += 1
|
62
62
|
@consecutive_error_count += 1
|
63
|
-
@window_errors[@window_index] = true
|
64
63
|
@error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
|
65
64
|
@error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
|
66
|
-
|
65
|
+
track_window_error(true)
|
67
66
|
@last_error = error
|
68
67
|
|
69
68
|
raise_if_necessary
|
@@ -92,10 +91,32 @@ module Utility
|
|
92
91
|
end
|
93
92
|
|
94
93
|
def num_errors_in_window
|
95
|
-
@window_errors.count(
|
94
|
+
@window_errors.count(true).to_f
|
96
95
|
end
|
97
96
|
|
98
|
-
def
|
97
|
+
def track_window_error(is_error)
|
98
|
+
# We keep the errors array of the size @window_size this way, imagine @window_size = 5
|
99
|
+
# Error array inits as falses:
|
100
|
+
# [ false, false, false, false, false ]
|
101
|
+
# Third document raises an error:
|
102
|
+
# [ false, false, true, false, false ]
|
103
|
+
# ^^^^
|
104
|
+
# 2 % 5 == 2
|
105
|
+
# Fifth document raises an error:
|
106
|
+
# [ false, false, true, false, true ]
|
107
|
+
# ^^^^
|
108
|
+
# 4 % 5 == 4
|
109
|
+
# Sixth document raises an error:
|
110
|
+
# [ true, false, true, false, true ]
|
111
|
+
# ^^^^
|
112
|
+
# 5 % 5 == 0
|
113
|
+
#
|
114
|
+
# Eigth document is successful:
|
115
|
+
# [ true, false, false, false, true ]
|
116
|
+
# ^^^^^
|
117
|
+
# 7 % 5 == 2
|
118
|
+
# And so on.
|
119
|
+
@window_errors[@window_index] = is_error
|
99
120
|
@window_index = (@window_index + 1) % @window_size
|
100
121
|
end
|
101
122
|
|
data/lib/utility/filtering.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: connectors_utility
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 8.6.0.
|
4
|
+
version: 8.6.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-11-
|
11
|
+
date: 2022-11-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -104,6 +104,7 @@ files:
|
|
104
104
|
- NOTICE.txt
|
105
105
|
- lib/connectors/connector_status.rb
|
106
106
|
- lib/connectors/crawler/scheduler.rb
|
107
|
+
- lib/connectors/job_trigger_method.rb
|
107
108
|
- lib/connectors/sync_status.rb
|
108
109
|
- lib/connectors_utility.rb
|
109
110
|
- lib/core/connector_job.rb
|
@@ -131,8 +132,8 @@ homepage: https://github.com/elastic/connectors-ruby
|
|
131
132
|
licenses:
|
132
133
|
- Elastic-2.0
|
133
134
|
metadata:
|
134
|
-
revision:
|
135
|
-
repository:
|
135
|
+
revision: 86b6c162f41d3a837841d0df2430cbc26b57eb33
|
136
|
+
repository: https://github.com/elastic/connectors-ruby
|
136
137
|
post_install_message:
|
137
138
|
rdoc_options: []
|
138
139
|
require_paths:
|